From 0f99956417425ef20e5592781e3b6335ea4f3f37 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Wed, 13 Mar 2013 14:36:27 -0400 Subject: [PATCH 001/118] Added Exec, Dir, and ActiveHTTP modules. --- scripts/base/init-default.bro | 3 + scripts/base/utils/active-http.bro | 120 +++++++++++++++++ scripts/base/utils/dir.bro | 51 +++++++ scripts/base/utils/exec.bro | 207 +++++++++++++++++++++++++++++ 4 files changed, 381 insertions(+) create mode 100644 scripts/base/utils/active-http.bro create mode 100644 scripts/base/utils/dir.bro create mode 100644 scripts/base/utils/exec.bro diff --git a/scripts/base/init-default.bro b/scripts/base/init-default.bro index 8b36899f10..9b62c80014 100644 --- a/scripts/base/init-default.bro +++ b/scripts/base/init-default.bro @@ -5,9 +5,12 @@ ##! you actually want. @load base/utils/site +@load base/utils/active-http @load base/utils/addrs @load base/utils/conn-ids +@load base/utils/dir @load base/utils/directions-and-hosts +@load base/utils/exec @load base/utils/files @load base/utils/numbers @load base/utils/paths diff --git a/scripts/base/utils/active-http.bro b/scripts/base/utils/active-http.bro new file mode 100644 index 0000000000..5522cc108a --- /dev/null +++ b/scripts/base/utils/active-http.bro @@ -0,0 +1,120 @@ +##! A module for performing active HTTP requests and +##! getting the reply at runtime. + +@load ./exec + +module ActiveHTTP; + +export { + ## The default timeout for HTTP requests. + const default_max_time = 1min &redef; + + ## The default HTTP method/verb to use for requests. + const default_method = "GET" &redef; + + type Response: record { + ## Numeric response code from the server. + code: count; + ## String response messgae from the server. + msg: string; + ## Full body of the response. + body: string &optional; + ## All headers returned by the server. + headers: table[string] of string &optional; + }; + + type Request: record { + ## The URL being requested. + url: string; + ## The HTTP method/verb to use for the request. + method: string &default=default_method; + ## Data to send to the server in the client body. Keep in + ## mind that you will probably need to set the $method field + ## to "POST" or "PUT". + client_data: string &optional; + ## Arbitrary headers to pass to the server. Some headers + ## will be included by libCurl. + #custom_headers: table[string] of string &optional; + ## Timeout for the request. + max_time: interval &default=default_max_time; + ## Additional curl command line arguments. Be very careful + ## with this option since shell injection could take place + ## if careful handling of untrusted data is not applied. + addl_curl_args: string &optional; + }; + + ## Perform an HTTP request according to the :bro:type:`Request` record. + ## This is an asynchronous function and must be called within a "when" + ## statement. + ## + ## req: A record instance representing all options for an HTTP request. + ## + ## Returns: A record with the full response message. + global request: function(req: ActiveHTTP::Request): ActiveHTTP::Response; +} + +function request2curl(r: Request, bodyfile: string, headersfile: string): string + { + local cmd = fmt("curl -s -g -o \"%s\" -D \"%s\" -X \"%s\"", + str_shell_escape(bodyfile), + str_shell_escape(headersfile), + str_shell_escape(r$method)); + + cmd = fmt("%s -m %.0f", cmd, r$max_time); + + if ( r?$client_data ) + cmd = fmt("%s -d -", cmd); + + if ( r?$addl_curl_args ) + cmd = fmt("%s %s", cmd, r$addl_curl_args); + + cmd = fmt("%s \"%s\"", cmd, str_shell_escape(r$url)); + return cmd; + } + +function request(req: Request): ActiveHTTP::Response + { + local tmpfile = "/tmp/bro-activehttp-" + unique_id(""); + local bodyfile = fmt("%s_body", tmpfile); + local headersfile = fmt("%s_headers", tmpfile); + + local cmd = request2curl(req, bodyfile, headersfile); + local stdin_data = req?$client_data ? req$client_data : ""; + + local resp: Response; + resp$code = 0; + resp$msg = ""; + resp$body = ""; + resp$headers = table(); + return when ( local result = Exec::run([$cmd=cmd, $stdin=stdin_data, $read_files=set(bodyfile, headersfile)]) ) + { + # If there is no response line then nothing else will work either. + if ( ! (result?$files && headersfile in result$files) ) + Reporter::error(fmt("There was a failure when requesting \"%s\" with ActiveHTTP.", req$url)); + + local headers = result$files[headersfile]; + for ( i in headers ) + { + # The reply is the first line. + if ( i == 0 ) + { + local response_line = split_n(headers[0], /[[:blank:]]+/, F, 2); + if ( |response_line| != 3 ) + return resp; + + resp$code = to_count(response_line[2]); + resp$msg = response_line[3]; + resp$body = join_string_vec(result$files[bodyfile], ""); + } + else + { + local line = headers[i]; + local h = split1(line, /:/); + if ( |h| != 2 ) + next; + resp$headers[h[1]] = sub_bytes(h[2], 0, |h[2]|-1); + } + } + return resp; + } + } diff --git a/scripts/base/utils/dir.bro b/scripts/base/utils/dir.bro new file mode 100644 index 0000000000..2ed1c8e6e9 --- /dev/null +++ b/scripts/base/utils/dir.bro @@ -0,0 +1,51 @@ +@load base/utils/exec +@load base/frameworks/reporter +@load base/utils/paths + +module Dir; + +export { + ## Register a directory to monitor with a callback that is called + ## every time a previously unseen file is seen. If a file is deleted + ## and seen to be gone, the file is available for being seen again in + ## the future. + ## + ## dir: The directory to monitor for files. + ## + ## callback: Callback that gets executed with each file name + ## that is found. Filenames are provided with the full path. + global monitor: function(dir: string, callback: function(fname: string)); + + ## The interval this module checks for files in directories when using + ## the :bro:see:`Dir::monitor` function. + const polling_interval = 30sec &redef; +} + +event Dir::monitor_ev(dir: string, last_files: set[string], callback: function(fname: string)) + { + when ( local result = Exec::run([$cmd=fmt("ls \"%s\"", str_shell_escape(dir))]) ) + { + if ( result$exit_code != 0 ) + { + Reporter::warning("Requested monitoring of non-existent directory."); + return; + } + + local current_files: set[string] = set(); + local files = result$stdout; + for ( i in files ) + { + if ( files[i] !in last_files ) + callback(build_path_compressed(dir, files[i])); + add current_files[files[i]]; + } + schedule polling_interval { Dir::monitor_ev(dir, current_files, callback) }; + } + } + +function monitor(dir: string, callback: function(fname: string)) + { + event Dir::monitor_ev(dir, set(), callback); + } + + diff --git a/scripts/base/utils/exec.bro b/scripts/base/utils/exec.bro new file mode 100644 index 0000000000..fe353cf590 --- /dev/null +++ b/scripts/base/utils/exec.bro @@ -0,0 +1,207 @@ +##! A module for executing external command line programs. +##! This requires code that is still in topic branches and +##! definitely won't currently work on any released version of Bro. + +@load base/frameworks/input + +module Exec; + +export { + type Command: record { + ## The command line to execute. + ## Use care to avoid injection attacks! + cmd: string; + ## Provide standard in to the program as a + ## string. + stdin: string &default=""; + ## If additional files are required to be read + ## in as part of the output of the command they + ## can be defined here. + read_files: set[string] &optional; + }; + + type Result: record { + ## Exit code from the program. + exit_code: count &default=0; + ## Each line of standard out. + stdout: vector of string &optional; + ## Each line of standard error. + stderr: vector of string &optional; + ## If additional files were requested to be read in + ## the content of the files will be available here. + files: table[string] of string_vec &optional; + }; + + ## Function for running command line programs and getting + ## output. This is an asynchronous function which is meant + ## to be run with the `when` statement. + ## + ## cmd: The command to run. Use care to avoid injection attacks! + ## + ## returns: A record representing the full results from the + ## external program execution. + global run: function(cmd: Command): Result; +} + +redef record Command += { + # The prefix name for tracking temp files. + prefix_name: string &optional; +}; + +global results: table[string] of Result = table(); +global finished_commands: set[string]; +global tmp_files: set[string] = set(); + +type OneLine: record { line: string; }; + +event Exec::stdout_line(description: Input::EventDescription, tpe: Input::Event, s: string) + { + local name = sub(description$name, /_[^_]*$/, ""); + + local result = results[name]; + if ( ! results[name]?$stdout ) + result$stdout = vector(s); + else + result$stdout[|result$stdout|] = s; + } + +event Exec::stderr_line(description: Input::EventDescription, tpe: Input::Event, s: string) + { + local name = sub(description$name, /_[^_]*$/, ""); + + local result = results[name]; + if ( ! results[name]?$stderr ) + result$stderr = vector(s); + else + result$stderr[|result$stderr|] = s; + } + +event Exec::file_line(description: Input::EventDescription, tpe: Input::Event, s: string) + { + local parts = split1(description$name, /_/); + local name = parts[1]; + local track_file = parts[2]; + + local result = results[name]; + if ( ! result?$files ) + result$files = table(); + + if ( track_file !in result$files ) + result$files[track_file] = vector(s); + else + result$files[track_file][|result$files[track_file]|] = s; + } + +event Exec::cleanup_and_do_callback(name: string) + { + Input::remove(fmt("%s_stdout", name)); + system(fmt("rm %s_stdout", name)); + delete tmp_files[fmt("%s_stdout", name)]; + + Input::remove(fmt("%s_stderr", name)); + system(fmt("rm %s_stderr", name)); + delete tmp_files[fmt("%s_stderr", name)]; + + Input::remove(fmt("%s_done", name)); + system(fmt("rm %s_done", name)); + delete tmp_files[fmt("%s_done", name)]; + + # Indicate to the "when" async watcher that this command is done. + add finished_commands[name]; + } + +event Exec::run_done(description: Input::EventDescription, tpe: Input::Event, s: string) + { + local name = sub(description$name, /_[^_]*$/, ""); + + if ( /^exit_code:/ in s ) + results[name]$exit_code = to_count(split1(s, /:/)[2]); + else if ( s == "done" ) + # Wait one second to allow all threads to read all of their input + # and forward it. + schedule 1sec { Exec::cleanup_and_do_callback(name) }; + } + +event Exec::start_watching_files(cmd: Command) + { + Input::add_event([$source=fmt("%s_done", cmd$prefix_name), + $name=fmt("%s_done", cmd$prefix_name), + $reader=Input::READER_RAW, + $mode=Input::STREAM, + $want_record=F, + $fields=OneLine, + $ev=Exec::run_done]); + + Input::add_event([$source=fmt("%s_stdout", cmd$prefix_name), + $name=fmt("%s_stdout", cmd$prefix_name), + $reader=Input::READER_RAW, + $mode=Input::STREAM, + $want_record=F, + $fields=OneLine, + $ev=Exec::stdout_line]); + + Input::add_event([$source=fmt("%s_stderr", cmd$prefix_name), + $name=fmt("%s_stderr", cmd$prefix_name), + $reader=Input::READER_RAW, + $mode=Input::STREAM, + $want_record=F, + $fields=OneLine, + $ev=Exec::stderr_line]); + + if ( cmd?$read_files ) + { + for ( read_file in cmd$read_files ) + { + Input::add_event([$source=fmt("%s", read_file), + $name=fmt("%s_%s", cmd$prefix_name, read_file), + $reader=Input::READER_RAW, + $mode=Input::STREAM, + $want_record=F, + $fields=OneLine, + $ev=Exec::file_line]); + } + } + } + +function run(cmd: Command): Result + { + cmd$prefix_name = "/tmp/bro-exec-" + unique_id(""); + system(fmt("touch %s_done %s_stdout %s_stderr 2>/dev/null", cmd$prefix_name, cmd$prefix_name, cmd$prefix_name)); + add tmp_files[fmt("%s_done", cmd$prefix_name)]; + add tmp_files[fmt("%s_stdout", cmd$prefix_name)]; + add tmp_files[fmt("%s_stderr", cmd$prefix_name)]; + + if ( cmd?$read_files ) + { + for ( read_file in cmd$read_files ) + { + system(fmt("touch %s 2>/dev/null", read_file)); + add tmp_files[read_file]; + } + } + + piped_exec(fmt("%s 2>> %s_stderr 1>> %s_stdout; echo \"exit_code:${?}\" >> %s_done; echo \"done\" >> %s_done", + cmd$cmd, cmd$prefix_name, cmd$prefix_name, cmd$prefix_name, cmd$prefix_name), + cmd$stdin); + + results[cmd$prefix_name] = []; + + schedule 1msec { Exec::start_watching_files(cmd) }; + + return when ( cmd$prefix_name in finished_commands ) + { + delete finished_commands[cmd$prefix_name]; + local result = results[cmd$prefix_name]; + delete results[cmd$prefix_name]; + return result; + } + } + +event bro_done() + { + # We are punting here and just deleting any files that haven't been processed yet. + for ( fname in tmp_files ) + { + system(fmt("rm \"%s\"", str_shell_escape(fname))); + } + } \ No newline at end of file From 035b668f7398cd4b803c9ecc455ce58203de666b Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Mon, 22 Apr 2013 21:52:21 -0400 Subject: [PATCH 002/118] Updates to use new input framework mechanism to execute command line programs. --- scripts/base/utils/exec.bro | 160 ++++++++++++++---------------------- 1 file changed, 60 insertions(+), 100 deletions(-) diff --git a/scripts/base/utils/exec.bro b/scripts/base/utils/exec.bro index fe353cf590..45cd8cb287 100644 --- a/scripts/base/utils/exec.bro +++ b/scripts/base/utils/exec.bro @@ -23,6 +23,8 @@ export { type Result: record { ## Exit code from the program. exit_code: count &default=0; + ## True if the command was terminated with a signal. + signal_exit: bool &default=F; ## Each line of standard out. stdout: vector of string &optional; ## Each line of standard error. @@ -41,39 +43,45 @@ export { ## returns: A record representing the full results from the ## external program execution. global run: function(cmd: Command): Result; + + ## The system directory for temp files. + const tmp_dir = "/tmp" &redef; } redef record Command += { - # The prefix name for tracking temp files. - prefix_name: string &optional; + # The unique id for tracking executors. + uid: string &optional; }; global results: table[string] of Result = table(); global finished_commands: set[string]; -global tmp_files: set[string] = set(); +global currently_tracked_files: set[string] = set(); +type OneLine: record { + s: string; + is_stderr: bool; +}; -type OneLine: record { line: string; }; +type FileLine: record { + s: string; +}; -event Exec::stdout_line(description: Input::EventDescription, tpe: Input::Event, s: string) +event Exec::line(description: Input::EventDescription, tpe: Input::Event, s: string, is_stderr: bool) { - local name = sub(description$name, /_[^_]*$/, ""); - - local result = results[name]; - if ( ! results[name]?$stdout ) - result$stdout = vector(s); + local result = results[description$name]; + if ( is_stderr ) + { + if ( ! result?$stderr ) + result$stderr = vector(s); + else + result$stderr[|result$stderr|] = s; + } else - result$stdout[|result$stdout|] = s; - } - -event Exec::stderr_line(description: Input::EventDescription, tpe: Input::Event, s: string) - { - local name = sub(description$name, /_[^_]*$/, ""); - - local result = results[name]; - if ( ! results[name]?$stderr ) - result$stderr = vector(s); - else - result$stderr[|result$stderr|] = s; + { + if ( ! result?$stdout ) + result$stdout = vector(s); + else + result$stdout[|result$stdout|] = s; + } } event Exec::file_line(description: Input::EventDescription, tpe: Input::Event, s: string) @@ -92,107 +100,59 @@ event Exec::file_line(description: Input::EventDescription, tpe: Input::Event, s result$files[track_file][|result$files[track_file]|] = s; } -event Exec::cleanup_and_do_callback(name: string) +event InputRaw::process_finished(name: string, source:string, exit_code:count, signal_exit:bool) { - Input::remove(fmt("%s_stdout", name)); - system(fmt("rm %s_stdout", name)); - delete tmp_files[fmt("%s_stdout", name)]; - - Input::remove(fmt("%s_stderr", name)); - system(fmt("rm %s_stderr", name)); - delete tmp_files[fmt("%s_stderr", name)]; - - Input::remove(fmt("%s_done", name)); - system(fmt("rm %s_done", name)); - delete tmp_files[fmt("%s_done", name)]; + results[name]$exit_code = exit_code; + results[name]$signal_exit = signal_exit; + Input::remove(name); # Indicate to the "when" async watcher that this command is done. add finished_commands[name]; } -event Exec::run_done(description: Input::EventDescription, tpe: Input::Event, s: string) +event Exec::start_watching_file(uid: string, read_file: string) { - local name = sub(description$name, /_[^_]*$/, ""); - - if ( /^exit_code:/ in s ) - results[name]$exit_code = to_count(split1(s, /:/)[2]); - else if ( s == "done" ) - # Wait one second to allow all threads to read all of their input - # and forward it. - schedule 1sec { Exec::cleanup_and_do_callback(name) }; - } - -event Exec::start_watching_files(cmd: Command) - { - Input::add_event([$source=fmt("%s_done", cmd$prefix_name), - $name=fmt("%s_done", cmd$prefix_name), + Input::add_event([$source=fmt("%s", read_file), + $name=fmt("%s_%s", uid, read_file), $reader=Input::READER_RAW, $mode=Input::STREAM, $want_record=F, - $fields=OneLine, - $ev=Exec::run_done]); - - Input::add_event([$source=fmt("%s_stdout", cmd$prefix_name), - $name=fmt("%s_stdout", cmd$prefix_name), - $reader=Input::READER_RAW, - $mode=Input::STREAM, - $want_record=F, - $fields=OneLine, - $ev=Exec::stdout_line]); - - Input::add_event([$source=fmt("%s_stderr", cmd$prefix_name), - $name=fmt("%s_stderr", cmd$prefix_name), - $reader=Input::READER_RAW, - $mode=Input::STREAM, - $want_record=F, - $fields=OneLine, - $ev=Exec::stderr_line]); - - if ( cmd?$read_files ) - { - for ( read_file in cmd$read_files ) - { - Input::add_event([$source=fmt("%s", read_file), - $name=fmt("%s_%s", cmd$prefix_name, read_file), - $reader=Input::READER_RAW, - $mode=Input::STREAM, - $want_record=F, - $fields=OneLine, - $ev=Exec::file_line]); - } - } + $fields=FileLine, + $ev=Exec::file_line]); } function run(cmd: Command): Result { - cmd$prefix_name = "/tmp/bro-exec-" + unique_id(""); - system(fmt("touch %s_done %s_stdout %s_stderr 2>/dev/null", cmd$prefix_name, cmd$prefix_name, cmd$prefix_name)); - add tmp_files[fmt("%s_done", cmd$prefix_name)]; - add tmp_files[fmt("%s_stdout", cmd$prefix_name)]; - add tmp_files[fmt("%s_stderr", cmd$prefix_name)]; + cmd$uid = unique_id(""); + results[cmd$uid] = []; if ( cmd?$read_files ) { for ( read_file in cmd$read_files ) { - system(fmt("touch %s 2>/dev/null", read_file)); - add tmp_files[read_file]; + add currently_tracked_files[read_file]; + system(fmt("touch \"%s\" 2>/dev/null", str_shell_escape(read_file))); + schedule 1msec { Exec::start_watching_file(cmd$uid, read_file) }; } } - piped_exec(fmt("%s 2>> %s_stderr 1>> %s_stdout; echo \"exit_code:${?}\" >> %s_done; echo \"done\" >> %s_done", - cmd$cmd, cmd$prefix_name, cmd$prefix_name, cmd$prefix_name, cmd$prefix_name), - cmd$stdin); + local config_strings: table[string] of string = { + ["stdin"] = cmd$stdin, + ["read_stderr"] = "1", + }; + Input::add_event([$name=cmd$uid, + $source=fmt("%s |", cmd$cmd), + $reader=Input::READER_RAW, + $fields=Exec::OneLine, + $ev=Exec::line, + $want_record=F, + $config=config_strings]); - results[cmd$prefix_name] = []; - - schedule 1msec { Exec::start_watching_files(cmd) }; - - return when ( cmd$prefix_name in finished_commands ) + return when ( cmd$uid in finished_commands ) { - delete finished_commands[cmd$prefix_name]; - local result = results[cmd$prefix_name]; - delete results[cmd$prefix_name]; + delete finished_commands[cmd$uid]; + local result = results[cmd$uid]; + delete results[cmd$uid]; return result; } } @@ -200,7 +160,7 @@ function run(cmd: Command): Result event bro_done() { # We are punting here and just deleting any files that haven't been processed yet. - for ( fname in tmp_files ) + for ( fname in currently_tracked_files ) { system(fmt("rm \"%s\"", str_shell_escape(fname))); } From 08348b2bc29f0d4661fbe61be355716a3ee51a25 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Mon, 22 Apr 2013 21:53:00 -0400 Subject: [PATCH 003/118] Update to make Dir::monitor watch inodes instead of file names. --- scripts/base/utils/dir.bro | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/scripts/base/utils/dir.bro b/scripts/base/utils/dir.bro index 2ed1c8e6e9..b154fe000e 100644 --- a/scripts/base/utils/dir.bro +++ b/scripts/base/utils/dir.bro @@ -23,11 +23,11 @@ export { event Dir::monitor_ev(dir: string, last_files: set[string], callback: function(fname: string)) { - when ( local result = Exec::run([$cmd=fmt("ls \"%s\"", str_shell_escape(dir))]) ) + when ( local result = Exec::run([$cmd=fmt("ls -i \"%s/\"", str_shell_escape(dir))]) ) { if ( result$exit_code != 0 ) { - Reporter::warning("Requested monitoring of non-existent directory."); + Reporter::warning(fmt("Requested monitoring of non-existent directory (%s).", dir)); return; } @@ -35,9 +35,10 @@ event Dir::monitor_ev(dir: string, last_files: set[string], callback: function(f local files = result$stdout; for ( i in files ) { - if ( files[i] !in last_files ) - callback(build_path_compressed(dir, files[i])); - add current_files[files[i]]; + local parts = split1(files[i], / /); + if ( parts[1] !in last_files ) + callback(build_path_compressed(dir, parts[2])); + add current_files[parts[1]]; } schedule polling_interval { Dir::monitor_ev(dir, current_files, callback) }; } From 4d275522c7a87f8c69b1494126cc995a20b2d66b Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Thu, 23 May 2013 16:03:26 -0700 Subject: [PATCH 004/118] Add abstraction for vector of bits. A bitvector is a vector of bits with underlying block storage. Since C++ has no notion of lvalues in the context of bits, we use a small wrapper class Reference that masks the desired bit in the corresponding block. --- src/BitVector.cc | 455 +++++++++++++++++++++++++++++++++++++++++++++ src/BitVector.h | 324 ++++++++++++++++++++++++++++++++ src/CMakeLists.txt | 1 + 3 files changed, 780 insertions(+) create mode 100644 src/BitVector.cc create mode 100644 src/BitVector.h diff --git a/src/BitVector.cc b/src/BitVector.cc new file mode 100644 index 0000000000..2f714a6c79 --- /dev/null +++ b/src/BitVector.cc @@ -0,0 +1,455 @@ +#include "BitVector.h" + +#include +#include + +BitVector::size_type BitVector::npos = static_cast(-1); +BitVector::block_type BitVector::bits_per_block = + std::numeric_limits::digits; + +namespace { + +uint8_t count_table[] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, + 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, + 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, + 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, + 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, + 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, + 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, + 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, + 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, + 6, 7, 6, 7, 7, 8 +}; + +} // namespace + +BitVector::Reference::Reference(block_type& block, block_type i) + : block_(block), + mask_(block_type(1) << i) + { + assert(i < bits_per_block); + } + +BitVector::Reference& BitVector::Reference::flip() + { + block_ ^= mask_; + return *this; + } + +BitVector::Reference::operator bool() const + { + return (block_ & mask_) != 0; + } + +bool BitVector::Reference::operator~() const + { + return (block_ & mask_) == 0; + } + +BitVector::Reference& BitVector::Reference::operator=(bool x) + { + x ? block_ |= mask_ : block_ &= ~mask_; + return *this; + } + +BitVector::Reference& BitVector::Reference::operator=(Reference const& other) + { + other ? block_ |= mask_ : block_ &= ~mask_; + return *this; + } + +BitVector::Reference& BitVector::Reference::operator|=(bool x) + { + if (x) + block_ |= mask_; + return *this; + } + +BitVector::Reference& BitVector::Reference::operator&=(bool x) + { + if (! x) + block_ &= ~mask_; + return *this; + } + +BitVector::Reference& BitVector::Reference::operator^=(bool x) + { + if (x) + block_ ^= mask_; + return *this; + } + +BitVector::Reference& BitVector::Reference::operator-=(bool x) + { + if (x) + block_ &= ~mask_; + return *this; + } + + +BitVector::BitVector() : num_bits_(0) { } + +BitVector::BitVector(size_type size, bool value) + : bits_(bits_to_blocks(size), value ? ~block_type(0) : 0), + num_bits_(size) +{ } + +BitVector::BitVector(BitVector const& other) + : bits_(other.bits_), + num_bits_(other.num_bits_) +{ } + +BitVector BitVector::operator~() const + { + BitVector b(*this); + b.flip(); + return b; + } + +BitVector& BitVector::operator=(BitVector const& other) + { + bits_ = other.bits_; + return *this; + } + +BitVector BitVector::operator<<(size_type n) const + { + BitVector b(*this); + return b <<= n; + } + +BitVector BitVector::operator>>(size_type n) const + { + BitVector b(*this); + return b >>= n; + } + +BitVector& BitVector::operator<<=(size_type n) + { + if (n >= num_bits_) + return reset(); + + if (n > 0) + { + size_type last = blocks() - 1; + size_type div = n / bits_per_block; + block_type r = bit_index(n); + block_type* b = &bits_[0]; + assert(blocks() >= 1); + assert(div <= last); + + if (r != 0) + { + for (size_type i = last - div; i > 0; --i) + b[i + div] = (b[i] << r) | (b[i - 1] >> (bits_per_block - r)); + b[div] = b[0] << r; + } + else + { + for (size_type i = last-div; i > 0; --i) + b[i + div] = b[i]; + b[div] = b[0]; + } + + std::fill_n(b, div, block_type(0)); + zero_unused_bits(); + } + + return *this; + } + +BitVector& BitVector::operator>>=(size_type n) + { + if (n >= num_bits_) + return reset(); + + if (n > 0) + { + size_type last = blocks() - 1; + size_type div = n / bits_per_block; + block_type r = bit_index(n); + block_type* b = &bits_[0]; + assert(blocks() >= 1); + assert(div <= last); + + if (r != 0) + { + for (size_type i = last - div; i > 0; --i) + b[i - div] = (b[i] >> r) | (b[i + 1] << (bits_per_block - r)); + b[last - div] = b[last] >> r; + } + else + { + for (size_type i = div; i <= last; ++i) + b[i-div] = b[i]; + } + + std::fill_n(b + (blocks() - div), div, block_type(0)); + } + return *this; + } + +BitVector& BitVector::operator&=(BitVector const& other) + { + assert(size() >= other.size()); + for (size_type i = 0; i < blocks(); ++i) + bits_[i] &= other.bits_[i]; + return *this; + } + +BitVector& BitVector::operator|=(BitVector const& other) + { + assert(size() >= other.size()); + for (size_type i = 0; i < blocks(); ++i) + bits_[i] |= other.bits_[i]; + return *this; + } + +BitVector& BitVector::operator^=(BitVector const& other) + { + assert(size() >= other.size()); + for (size_type i = 0; i < blocks(); ++i) + bits_[i] ^= other.bits_[i]; + return *this; + } + +BitVector& BitVector::operator-=(BitVector const& other) + { + assert(size() >= other.size()); + for (size_type i = 0; i < blocks(); ++i) + bits_[i] &= ~other.bits_[i]; + return *this; + } + +BitVector operator&(BitVector const& x, BitVector const& y) + { + BitVector b(x); + return b &= y; + } + +BitVector operator|(BitVector const& x, BitVector const& y) + { + BitVector b(x); + return b |= y; + } + +BitVector operator^(BitVector const& x, BitVector const& y) + { + BitVector b(x); + return b ^= y; + } + +BitVector operator-(BitVector const& x, BitVector const& y) + { + BitVector b(x); + return b -= y; + } + +bool operator==(BitVector const& x, BitVector const& y) + { + return x.num_bits_ == y.num_bits_ && x.bits_ == y.bits_; + } + +bool operator!=(BitVector const& x, BitVector const& y) + { + return ! (x == y); + } + +bool operator<(BitVector const& x, BitVector const& y) + { + assert(x.size() == y.size()); + for (BitVector::size_type r = x.blocks(); r > 0; --r) + { + BitVector::size_type i = r - 1; + if (x.bits_[i] < y.bits_[i]) + return true; + else if (x.bits_[i] > y.bits_[i]) + return false; + } + return false; + } + +void BitVector::resize(size_type n, bool value) + { + size_type old = blocks(); + size_type required = bits_to_blocks(n); + block_type block_value = value ? ~block_type(0) : block_type(0); + + if (required != old) + bits_.resize(required, block_value); + + if (value && (n > num_bits_) && extra_bits()) + bits_[old - 1] |= (block_value << extra_bits()); + + num_bits_ = n; + zero_unused_bits(); + } + +void BitVector::clear() + { + bits_.clear(); + num_bits_ = 0; + } + +void BitVector::push_back(bool bit) + { + size_type s = size(); + resize(s + 1); + set(s, bit); + } + +void BitVector::append(block_type block) + { + size_type excess = extra_bits(); + if (excess) + { + assert(! bits_.empty()); + bits_.push_back(block >> (bits_per_block - excess)); + bits_[bits_.size() - 2] |= (block << excess); + } + else + { + bits_.push_back(block); + } + num_bits_ += bits_per_block; + } + +BitVector& BitVector::set(size_type i, bool bit) + { + assert(i < num_bits_); + + if (bit) + bits_[block_index(i)] |= bit_mask(i); + else + reset(i); + + return *this; + } + +BitVector& BitVector::set() + { + std::fill(bits_.begin(), bits_.end(), ~block_type(0)); + zero_unused_bits(); + return *this; + } + +BitVector& BitVector::reset(size_type i) + { + assert(i < num_bits_); + bits_[block_index(i)] &= ~bit_mask(i); + return *this; + } + +BitVector& BitVector::reset() + { + std::fill(bits_.begin(), bits_.end(), block_type(0)); + return *this; + } + +BitVector& BitVector::flip(size_type i) + { + assert(i < num_bits_); + bits_[block_index(i)] ^= bit_mask(i); + return *this; + } + +BitVector& BitVector::flip() + { + for (size_type i = 0; i < blocks(); ++i) + bits_[i] = ~bits_[i]; + zero_unused_bits(); + return *this; + } + +bool BitVector::operator[](size_type i) const + { + assert(i < num_bits_); + return (bits_[block_index(i)] & bit_mask(i)) != 0; + } + +BitVector::Reference BitVector::operator[](size_type i) + { + assert(i < num_bits_); + return Reference(bits_[block_index(i)], bit_index(i)); + } + +BitVector::size_type BitVector::count() const + { + std::vector::const_iterator first = bits_.begin(); + size_t n = 0; + size_type length = blocks(); + while (length) + { + block_type block = *first; + while (block) + { + // TODO: use __popcnt if available. + n += count_table[block & ((1u << 8) - 1)]; + block >>= 8; + } + ++first; + --length; + } + return n; + } + +BitVector::size_type BitVector::blocks() const + { + return bits_.size(); + } + +BitVector::size_type BitVector::size() const + { + return num_bits_; + } + +bool BitVector::empty() const + { + return bits_.empty(); + } + +BitVector::size_type BitVector::find_first() const + { + return find_from(0); + } + +BitVector::size_type BitVector::find_next(size_type i) const + { + if (i >= (size() - 1) || size() == 0) + return npos; + ++i; + size_type bi = block_index(i); + block_type block = bits_[bi] & (~block_type(0) << bit_index(i)); + return block ? bi * bits_per_block + lowest_bit(block) : find_from(bi + 1); + } + +BitVector::size_type BitVector::lowest_bit(block_type block) + { + block_type x = block - (block & (block - 1)); + size_type log = 0; + while (x >>= 1) + ++log; + return log; + } + +BitVector::block_type BitVector::extra_bits() const + { + return bit_index(size()); + } + +void BitVector::zero_unused_bits() + { + if (extra_bits()) + bits_.back() &= ~(~block_type(0) << extra_bits()); + } + +BitVector::size_type BitVector::find_from(size_type i) const + { + while (i < blocks() && bits_[i] == 0) + ++i; + if (i >= blocks()) + return npos; + return i * bits_per_block + lowest_bit(bits_[i]); + } diff --git a/src/BitVector.h b/src/BitVector.h new file mode 100644 index 0000000000..46d7e2df8f --- /dev/null +++ b/src/BitVector.h @@ -0,0 +1,324 @@ +#ifndef BitVector_h +#define BitVector_h + +#include +#include + +/** + * A vector of bits. + */ +class BitVector { +public: + typedef size_t block_type; + typedef size_t size_type; + static size_type npos; + static block_type bits_per_block; + +public: + /** + * An lvalue proxy for single bits. + */ + class Reference { + friend class BitVector; + Reference(block_type& block, block_type i); + + public: + Reference& flip(); + operator bool() const; + bool operator~() const; + Reference& operator=(bool x); + Reference& operator=(Reference const& other); + Reference& operator|=(bool x); + Reference& operator&=(bool x); + Reference& operator^=(bool x); + Reference& operator-=(bool x); + + private: + void operator&(); + block_type& block_; + block_type const mask_; + }; + + typedef bool const_reference; + + /** + * Constructs an empty bit vector. + */ + BitVector(); + + /** + * Constructs a bit vector of a given size. + * @param size The number of bits. + * @param value The value for each bit. + */ + explicit BitVector(size_type size, bool value = false); + + /** + * Constructs a bit vector from a sequence of blocks. + */ + template + BitVector(InputIterator first, InputIterator last) + { + bits_.insert(bits_.end(), first, last); + num_bits_ = bits_.size() * bits_per_block; + } + + /** + * Copy-constructs a bit vector. + * @param other The bit vector to copy. + */ + BitVector(const BitVector& other); + + /** + * Assigns another bit vector to this instance. + * @param other The RHS of the assignment. + */ + BitVector& operator=(const BitVector& other); + + // + // Bitwise operations + // + BitVector operator~() const; + BitVector operator<<(size_type n) const; + BitVector operator>>(size_type n) const; + BitVector& operator<<=(size_type n); + BitVector& operator>>=(size_type n); + BitVector& operator&=(BitVector const& other); + BitVector& operator|=(BitVector const& other); + BitVector& operator^=(BitVector const& other); + BitVector& operator-=(BitVector const& other); + friend BitVector operator&(BitVector const& x, BitVector const& y); + friend BitVector operator|(BitVector const& x, BitVector const& y); + friend BitVector operator^(BitVector const& x, BitVector const& y); + friend BitVector operator-(BitVector const& x, BitVector const& y); + + // + // Relational operators + // + friend bool operator==(BitVector const& x, BitVector const& y); + friend bool operator!=(BitVector const& x, BitVector const& y); + friend bool operator<(BitVector const& x, BitVector const& y); + + // + // Basic operations + // + /** Appends the bits in a sequence of values. + * @tparam Iterator A forward iterator. + * @param first An iterator pointing to the first element of the sequence. + * @param last An iterator pointing to one past the last element of the + * sequence. + */ + template + void append(ForwardIterator first, ForwardIterator last) + { + if (first == last) + return; + + block_type excess = extra_bits(); + typename std::iterator_traits::difference_type delta = + std::distance(first, last); + + bits_.reserve(blocks() + delta); + if (excess == 0) + { + bits_.back() |= (*first << excess); + do + { + block_type b = *first++ >> (bits_per_block - excess); + bits_.push_back(b | (first == last ? 0 : *first << excess)); + } while (first != last); + } + else + { + bits_.insert(bits_.end(), first, last); + } + num_bits_ += bits_per_block * delta; + } + + /** + * Appends the bits in a given block. + * @param block The block containing bits to append. + */ + void append(block_type block); + + /** Appends a single bit to the end of the bit vector. + * @param bit The value of the bit. + */ + void push_back(bool bit); + + /** + * Clears all bits in the bitvector. + */ + void clear(); + + /** + * Resizes the bit vector to a new number of bits. + * @param n The new number of bits of the bit vector. + * @param value The bit value of new values, if the vector expands. + */ + void resize(size_type n, bool value = false); + + /** + * Sets a bit at a specific position to a given value. + * @param i The bit position. + * @param bit The value assigned to position *i*. + * @return A reference to the bit vector instance. + */ + BitVector& set(size_type i, bool bit = true); + + /** + * Sets all bits to 1. + * @return A reference to the bit vector instance. + */ + BitVector& set(); + + /** + * Resets a bit at a specific position, i.e., sets it to 0. + * @param i The bit position. + * @return A reference to the bit vector instance. + */ + BitVector& reset(size_type i); + + /** + * Sets all bits to 0. + * @return A reference to the bit vector instance. + */ + BitVector& reset(); + + /** + * Toggles/flips a bit at a specific position. + * @param i The bit position. + * @return A reference to the bit vector instance. + */ + BitVector& flip(size_type i); + + /** + * Computes the complement. + * @return A reference to the bit vector instance. + */ + BitVector& flip(); + + /** Retrieves a single bit. + * @param i The bit position. + * @return A mutable reference to the bit at position *i*. + */ + Reference operator[](size_type i); + + /** + * Retrieves a single bit. + * @param i The bit position. + * @return A const-reference to the bit at position *i*. + */ + const_reference operator[](size_type i) const; + + /** + * Counts the number of 1-bits in the bit vector. Also known as *population + * count* or *Hamming weight*. + * @return The number of bits set to 1. + */ + size_type count() const; + + /** + * Retrieves the number of blocks of the underlying storage. + * @param The number of blocks that represent `size()` bits. + */ + size_type blocks() const; + + /** + * Retrieves the number of bits the bitvector consist of. + * @return The length of the bit vector in bits. + */ + size_type size() const; + + /** + * Checks whether the bit vector is empty. + * @return `true` iff the bitvector has zero length. + */ + bool empty() const; + + /** + * Finds the bit position of of the first 1-bit. + * @return The position of the first bit that equals to one or `npos` if no + * such bit exists. + */ + size_type find_first() const; + + /** + * Finds the next 1-bit from a given starting position. + * + * @param i The index where to start looking. + * + * @return The position of the first bit that equals to 1 after position + * *i* or `npos` if no such bit exists. + */ + size_type find_next(size_type i) const; + +private: + /** + * Computes the block index for a given bit position. + */ + static size_type block_index(size_type i) + { + return i / bits_per_block; + } + + /** + * Computes the bit index within a given block for a given bit position. + */ + static block_type bit_index(size_type i) + { + return i % bits_per_block; + } + + /** + * Computes the bitmask block to extract a bit a given bit position. + */ + static block_type bit_mask(size_type i) + { + return block_type(1) << bit_index(i); + } + + /** + * Computes the number of blocks needed to represent a given number of + * bits. + * @param bits the number of bits. + * @return The number of blocks to represent *bits* number of bits. + */ + static size_type bits_to_blocks(size_type bits) + { + return bits / bits_per_block + + static_cast(bits % bits_per_block != 0); + } + + /** + * Computes the bit position first 1-bit in a given block. + * @param block The block to inspect. + * @return The bit position where *block* has its first bit set to 1. + */ + static size_type lowest_bit(block_type block); + + /** + * Computes the number of excess/unused bits in the bit vector. + */ + block_type extra_bits() const; + + /** + * If the number of bits in the vector are not not a multiple of + * bitvector::bits_per_block, then the last block exhibits unused bits which + * this function resets. + */ + void zero_unused_bits(); + + /** + * Looks for the first 1-bit starting at a given position. + * @param i The block index to start looking. + * @return The block index of the first 1-bit starting from *i* or + * `bitvector::npos` if no 1-bit exists. + */ + size_type find_from(size_type i) const; + + std::vector bits_; + size_type num_bits_; +}; + +#endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 447b7d9ec7..33aaab29c1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -303,6 +303,7 @@ set(bro_SRCS Base64.cc BitTorrent.cc BitTorrentTracker.cc + BitVector.cc BPF_Program.cc BroDoc.cc BroDocObj.cc From 9e32eaad6db992e60a3d669c4d8c7b5016cc8cbc Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Tue, 28 May 2013 20:58:01 -0700 Subject: [PATCH 005/118] Make bitvectors serializable. --- src/BitVector.cc | 57 +++++++++++++++++++++++++++++++++++++++++++++-- src/BitVector.h | 13 ++++++++--- src/SerialTypes.h | 2 ++ 3 files changed, 67 insertions(+), 5 deletions(-) diff --git a/src/BitVector.cc b/src/BitVector.cc index 2f714a6c79..f57301d506 100644 --- a/src/BitVector.cc +++ b/src/BitVector.cc @@ -2,6 +2,7 @@ #include #include +#include "Serializer.h" BitVector::size_type BitVector::npos = static_cast(-1); BitVector::block_type BitVector::bits_per_block = @@ -62,7 +63,7 @@ BitVector::Reference& BitVector::Reference::operator=(Reference const& other) BitVector::Reference& BitVector::Reference::operator|=(bool x) { - if (x) + if (x) block_ |= mask_; return *this; } @@ -73,7 +74,7 @@ BitVector::Reference& BitVector::Reference::operator&=(bool x) block_ &= ~mask_; return *this; } - + BitVector::Reference& BitVector::Reference::operator^=(bool x) { if (x) @@ -453,3 +454,55 @@ BitVector::size_type BitVector::find_from(size_type i) const return npos; return i * bits_per_block + lowest_bit(bits_[i]); } + +bool BitVector::Serialize(SerialInfo* info) const + { + return SerialObj::Serialize(info); + } + +BitVector* BitVector::Unserialize(UnserialInfo* info) + { + return reinterpret_cast( + SerialObj::Unserialize(info, SER_BITVECTOR)); + } + +IMPLEMENT_SERIAL(BitVector, SER_BITVECTOR); + +bool BitVector::DoSerialize(SerialInfo* info) const + { + DO_SERIALIZE(SER_BITVECTOR, SerialObj); + + if ( ! SERIALIZE(static_cast(bits_.size())) ) + return false; + + for (size_t i = 0; i < bits_.size(); ++i) + if ( ! SERIALIZE(static_cast(bits_[i])) ) + return false; + + return SERIALIZE(static_cast(num_bits_)); + } + +bool BitVector::DoUnserialize(UnserialInfo* info) + { + DO_UNSERIALIZE(SerialObj); + + uint64 size; + if ( ! UNSERIALIZE(&size) ) + return false; + + bits_.resize(static_cast(size)); + uint64 block; + for ( size_t i = 0; i < bits_.size(); ++i ) + { + if ( ! UNSERIALIZE(&block) ) + return false; + bits_[i] = static_cast(block); + } + + uint64 num_bits; + if ( ! UNSERIALIZE(&num_bits) ) + return false; + num_bits_ = static_cast(num_bits); + + return true; + } diff --git a/src/BitVector.h b/src/BitVector.h index 46d7e2df8f..9900dd103e 100644 --- a/src/BitVector.h +++ b/src/BitVector.h @@ -3,11 +3,12 @@ #include #include +#include "SerialObj.h" /** * A vector of bits. */ -class BitVector { +class BitVector : SerialObj { public: typedef size_t block_type; typedef size_t size_type; @@ -42,7 +43,7 @@ public: typedef bool const_reference; /** - * Constructs an empty bit vector. + * Default-constructs an empty bit vector. */ BitVector(); @@ -253,6 +254,12 @@ public: */ size_type find_next(size_type i) const; + bool Serialize(SerialInfo* info) const; + static BitVector* Unserialize(UnserialInfo* info); + +protected: + DECLARE_SERIAL(BitVector); + private: /** * Computes the block index for a given bit position. @@ -286,7 +293,7 @@ private: */ static size_type bits_to_blocks(size_type bits) { - return bits / bits_per_block + return bits / bits_per_block + static_cast(bits % bits_per_block != 0); } diff --git a/src/SerialTypes.h b/src/SerialTypes.h index 723badab1e..c9c0c34a33 100644 --- a/src/SerialTypes.h +++ b/src/SerialTypes.h @@ -49,6 +49,7 @@ SERIAL_IS(STATE_ACCESS, 0x1100) SERIAL_IS_BO(CASE, 0x1200) SERIAL_IS(LOCATION, 0x1300) SERIAL_IS(RE_MATCHER, 0x1400) +SERIAL_IS(BITVECTOR, 0x1500) // These are the externally visible types. const SerialType SER_NONE = 0; @@ -202,5 +203,6 @@ SERIAL_CONST2(STATE_ACCESS) SERIAL_CONST2(CASE) SERIAL_CONST2(LOCATION) SERIAL_CONST2(RE_MATCHER) +SERIAL_CONST2(BITVECTOR) #endif From d873db03cef3bb09d45e789d69607487e36b6093 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Fri, 31 May 2013 18:31:14 -0700 Subject: [PATCH 006/118] Add draft of Bloom filter type hierarchy. --- src/BloomFilter.h | 266 +++++++++++++++++++++++++++++++++++++++++++++ src/CMakeLists.txt | 1 + 2 files changed, 267 insertions(+) create mode 100644 src/BloomFilter.h diff --git a/src/BloomFilter.h b/src/BloomFilter.h new file mode 100644 index 0000000000..a767c6b8b8 --- /dev/null +++ b/src/BloomFilter.h @@ -0,0 +1,266 @@ +#ifndef BloomFilter_h +#define BloomFilter_h + +#include +#include "BitVector.h" +#include "Hash.h" +#include "H3.h" + +/** + * A vector of counters, each of which have a fixed number of bits. + */ +class CounterVector : SerialObj { +public: + /** + * Constructs a counter vector having cells of a given width. + * + * @param width The number of bits that each cell occupies. + */ + explicit CounterVector(unsigned width); + + /** + * Increments a given cell. + * + * @param cell The cell to increment. + * + * @param value The value to add to the current counter in *cell*. + * + * @return `true` if adding *value* to the counter in *cell* succeeded. + */ + bool Increment(size_type cell, count_type value); + + /** + * Decrements a given cell. + * + * @param cell The cell to decrement. + * + * @param value The value to subtract from the current counter in *cell*. + * + * @return `true` if subtracting *value* from the counter in *cell* succeeded. + */ + bool Decrement(size_type cell, count_type value); + + /** + * Retrieves the counter of a given cell. + * + * @param cell The cell index to retrieve the count for. + * + * @return The counter associated with *cell*. + */ + count_type Count(size_type cell) const; + + /** + * Retrieves the number of cells in the storage. + * + * @return The number of cells. + */ + size_type Size() const; + + bool Serialize(SerialInfo* info) const; + static CounterVector* Unserialize(UnserialInfo* info); + +protected: + DECLARE_SERIAL(CounterVector); + + CounterVector(); + +private: + BitVector bits_; + unsigned width_; +}; + +/** + * The abstract base class for hash policies. + * @tparam Codomain An integral type. + */ +class HashPolicy { +public: + typedef hash_t hash_type; + virtual ~HashPolicy() { } + size_t k() const { return k; } + virtual std::vector Hash(const void* x, size_t n) const = 0; +protected: + /** + * A functor that computes a universal hash function. + * @tparam Codomain An integral type. + */ + template + class Hasher { + public: + template + Codomain operator()(const Domain& x) const + { + return h3_(&x, sizeof(x)); + } + Codomain operator()(const void* x, size_t n) const + { + return h3_(x, n); + } + private: + // FIXME: The hardcoded value of 36 comes from UHASH_KEY_SIZE defined in + // Hash.h. I do not know how this value impacts the hash function behavior + // so I'll just copy it verbatim. (Matthias) + H3 h3_; + }; + + HashPolicy(size_t k) : k_(k) { } +private: + size_t k_; +}; + +/** + * The *default* hashing policy. Performs *k* hash function computations. + */ +class DefaultHashing : public HashPolicy { +public: + DefaultHashing(size_t k) : HashPolicy(k), hashers_(k) { } + virtual ~DoubleHashing() { } + + virtual std::vector Hash(const void* x, size_t n) const + { + std::vector h(k(), 0); + for (size_t i = 0; i < h.size(); ++i) + h[i] = hashers_[i](x, n); + return h; + } + +private: + std::vector< Hasher > hashers_; +}; + +/** + * The *double-hashing* policy. Uses a linear combination of 2 hash functions. + */ +class DoubleHashing : public HashPolicy { +public: + DoubleHashing(size_t k) : HashPolicy(k), hashers_(k) { } + virtual ~DoubleHashing() { } + + virtual std::vector Hash(const void* x, size_t n) const + { + Codomain h1 = hasher1_(x); + Codomain h2 = hasher2_(x); + std::vector h(k(), 0); + for (size_t i = 0; i < h.size(); ++i) + h[i] = h1 + i * h2; + return h; + } + +private: + Hasher hasher1_; + Hasher hasher2_; +}; + +/** + * The abstract base class for Bloom filters. + */ +class BloomFilter : SerialObj { +public: + virtual ~BloomFilter() { delete hash_; } + + /** + * Adds an element of type T to the Bloom filter. + * @param x The element to add + */ + template + void Add(const T& x) + { + ++elements_; + AddImpl(hash_->Hash(x)); + } + + /** + * Retrieves the associated count of a given value. + * + * @param x The value of type `T` to check. + * + * @return The counter associated with *x*. + */ + template + size_t Count(const T& x) const + { + return CountImpl(hash_->Hash(x)); + } + + /** + * Retrieves the number of elements added to the Bloom filter. + * + * @return The number of elements in this Bloom filter. + */ + size_t Size() const + { + return elements_; + } + +protected: + typedef std::vector HashVector; + + /** + * Default-constructs a Bloom filter. + */ + BloomFilter(); + + /** + * Constructs a BloomFilter. + * @param hash The hashing policy. + */ + BloomFilter(HashPolicy* hash); + + virtual void AddImpl(const HashVector& hashes) = 0; + + virtual size_t CountImpl(const HashVector& hashes) const = 0; + + std::vector Hash(const T& x) const + { + return hash_->Hash(&x, sizeof(x)); + } + +private: + HashPolicy* hash_; // Owned by *this. + + size_t elements_; +}; + +/** + * A basic Bloom filter. + */ +class BasicBloomFilter : public BloomFilter { +public: + BasicBloomFilter(); + BasicBloomFilter(HashPolicy* hash); + +protected: + virtual void AddImpl(const HashVector& h) + { + for ( size_t i = 0; i < h.size(); ++i ) + bits_.set(h[i] % h.size()); + } + + virtual size_t CountImpl(const HashVector& h) const + { + for ( size_t i = 0; i < h.size(); ++i ) + if ( ! bits_[h[i] % h.size()] ) + return 0; + return 1; + } + +private: + BitVector bits_; +}; + +/** + * A counting Bloom filter. + */ +class CountingBloomFilter : public BloomFilter { +public: + CountingBloomFilter(unsigned width); + CountingBloomFilter(HashPolicy* hash); + +protected: + CountingBloomFilter(); + +private: + CounterVector cells_; +}; + +#endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 33aaab29c1..11de7772d7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -304,6 +304,7 @@ set(bro_SRCS BitTorrent.cc BitTorrentTracker.cc BitVector.cc + BloomFilter.cc BPF_Program.cc BroDoc.cc BroDocObj.cc From 190f98f8a901728d507452b09717692c4c227821 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Mon, 3 Jun 2013 10:51:53 -0400 Subject: [PATCH 007/118] Beginning some rework. --- scripts/base/files/hash/__load__.bro | 1 + scripts/base/files/hash/main.bro | 22 ++++++++++ .../base/frameworks/file-analysis/main.bro | 40 +++++++++---------- scripts/base/init-default.bro | 2 + 4 files changed, 44 insertions(+), 21 deletions(-) create mode 100644 scripts/base/files/hash/__load__.bro create mode 100644 scripts/base/files/hash/main.bro diff --git a/scripts/base/files/hash/__load__.bro b/scripts/base/files/hash/__load__.bro new file mode 100644 index 0000000000..d551be57d3 --- /dev/null +++ b/scripts/base/files/hash/__load__.bro @@ -0,0 +1 @@ +@load ./main \ No newline at end of file diff --git a/scripts/base/files/hash/main.bro b/scripts/base/files/hash/main.bro new file mode 100644 index 0000000000..cd50d6b291 --- /dev/null +++ b/scripts/base/files/hash/main.bro @@ -0,0 +1,22 @@ + +module FilesHash; + +export { + +} + +event file_hash(f: fa_file, kind: string, hash: string) &priority=5 + { + set_info(f); + switch ( kind ) { + case "md5": + f$info$md5 = hash; + break; + case "sha1": + f$info$sha1 = hash; + break; + case "sha256": + f$info$sha256 = hash; + break; + } + } diff --git a/scripts/base/frameworks/file-analysis/main.bro b/scripts/base/frameworks/file-analysis/main.bro index 142709dcc4..418da53f70 100644 --- a/scripts/base/frameworks/file-analysis/main.bro +++ b/scripts/base/frameworks/file-analysis/main.bro @@ -34,6 +34,9 @@ export { ## For the most part, fields here are derived from ones of the same name ## in :bro:see:`fa_file`. type Info: record { + ## The time when the file was first seen. + ts: time &log; + ## An identifier associated with a single file. id: string &log; @@ -233,25 +236,35 @@ function set_info(f: fa_file) { if ( ! f?$info ) { - local tmp: Info; + local tmp: Info = Info($ts=network_time()); f$info = tmp; } + f$info$ts = network_time(); f$info$id = f$id; - if ( f?$parent_id ) f$info$parent_id = f$parent_id; - if ( f?$source ) f$info$source = f$source; - if ( f?$is_orig ) f$info$is_orig = f$is_orig; + if ( f?$parent_id ) + f$info$parent_id = f$parent_id; + if ( f?$source ) + f$info$source = f$source; + if ( f?$is_orig ) + f$info$is_orig = f$is_orig; f$info$last_active = f$last_active; f$info$seen_bytes = f$seen_bytes; - if ( f?$total_bytes ) f$info$total_bytes = f$total_bytes; + if ( f?$total_bytes ) + f$info$total_bytes = f$total_bytes; f$info$missing_bytes = f$missing_bytes; f$info$overflow_bytes = f$overflow_bytes; f$info$timeout_interval = f$timeout_interval; f$info$bof_buffer_size = f$bof_buffer_size; - if ( f?$mime_type ) f$info$mime_type = f$mime_type; + if ( f?$mime_type ) + f$info$mime_type = f$mime_type; if ( f?$conns ) + { for ( cid in f$conns ) + { add f$info$conn_uids[f$conns[cid]$uid]; + } + } } function set_timeout_interval(f: fa_file, t: interval): bool @@ -324,21 +337,6 @@ event file_timeout(f: fa_file) &priority=5 f$info$timedout = T; } -event file_hash(f: fa_file, kind: string, hash: string) &priority=5 - { - set_info(f); - switch ( kind ) { - case "md5": - f$info$md5 = hash; - break; - case "sha1": - f$info$sha1 = hash; - break; - case "sha256": - f$info$sha256 = hash; - break; - } - } event file_state_remove(f: fa_file) &priority=5 { diff --git a/scripts/base/init-default.bro b/scripts/base/init-default.bro index 829a1b9982..03ba474e0b 100644 --- a/scripts/base/init-default.bro +++ b/scripts/base/init-default.bro @@ -46,4 +46,6 @@ @load base/protocols/ssl @load base/protocols/syslog +@load base/files/hash + @load base/misc/find-checksum-offloading From f529df33e0afa930e4babff66f4a5f590b5eb6d9 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Mon, 3 Jun 2013 14:00:28 -0700 Subject: [PATCH 008/118] Stabilize Bloom filter interface. --- src/BloomFilter.cc | 33 ++++++++++++++++++ src/BloomFilter.h | 85 +++++++++++++++++----------------------------- 2 files changed, 65 insertions(+), 53 deletions(-) create mode 100644 src/BloomFilter.cc diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc new file mode 100644 index 0000000000..6873815f69 --- /dev/null +++ b/src/BloomFilter.cc @@ -0,0 +1,33 @@ +#include "BloomFilter.h" + +HashPolicy::HashVector DefaultHashing::Hash(const void* x, size_t n) const + { + HashVector h(k(), 0); + for ( size_t i = 0; i < h.size(); ++i ) + h[i] = hashers_[i](x, n); + return h; + } + +HashPolicy::HashVector DoubleHashing::Hash(const void* x, size_t n) const + { + HashType h1 = hasher1_(x); + HashType h2 = hasher2_(x); + HashVector h(k(), 0); + for ( size_t i = 0; i < h.size(); ++i ) + h[i] = h1 + i * h2; + return h; + } + +void BasicBloomFilter::AddImpl(const HashPolicy::HashVector& h) + { + for ( size_t i = 0; i < h.size(); ++i ) + bits_.set(h[i] % h.size()); + } + +size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const + { + for ( size_t i = 0; i < h.size(); ++i ) + if ( ! bits_[h[i] % h.size()] ) + return 0; + return 1; + } diff --git a/src/BloomFilter.h b/src/BloomFilter.h index a767c6b8b8..dca4eff2bd 100644 --- a/src/BloomFilter.h +++ b/src/BloomFilter.h @@ -11,6 +11,9 @@ */ class CounterVector : SerialObj { public: + typedef size_t size_type; + typedef uint64 count_type; + /** * Constructs a counter vector having cells of a given width. * @@ -70,21 +73,24 @@ private: }; /** - * The abstract base class for hash policies. + * The abstract base class for hash policies that hash elements *k* times. * @tparam Codomain An integral type. */ class HashPolicy { public: - typedef hash_t hash_type; + typedef hash_t HashType; + typedef std::vector HashVector; + virtual ~HashPolicy() { } - size_t k() const { return k; } - virtual std::vector Hash(const void* x, size_t n) const = 0; + size_t k() const { return k_; } + virtual HashVector Hash(const void* x, size_t n) const = 0; + protected: /** * A functor that computes a universal hash function. * @tparam Codomain An integral type. */ - template + template class Hasher { public: template @@ -104,8 +110,9 @@ protected: }; HashPolicy(size_t k) : k_(k) { } + private: - size_t k_; + const size_t k_; }; /** @@ -114,18 +121,12 @@ private: class DefaultHashing : public HashPolicy { public: DefaultHashing(size_t k) : HashPolicy(k), hashers_(k) { } - virtual ~DoubleHashing() { } + virtual ~DefaultHashing() { } - virtual std::vector Hash(const void* x, size_t n) const - { - std::vector h(k(), 0); - for (size_t i = 0; i < h.size(); ++i) - h[i] = hashers_[i](x, n); - return h; - } + virtual HashVector Hash(const void* x, size_t n) const; private: - std::vector< Hasher > hashers_; + std::vector< Hasher > hashers_; }; /** @@ -133,22 +134,14 @@ private: */ class DoubleHashing : public HashPolicy { public: - DoubleHashing(size_t k) : HashPolicy(k), hashers_(k) { } + DoubleHashing(size_t k) : HashPolicy(k) { } virtual ~DoubleHashing() { } - virtual std::vector Hash(const void* x, size_t n) const - { - Codomain h1 = hasher1_(x); - Codomain h2 = hasher2_(x); - std::vector h(k(), 0); - for (size_t i = 0; i < h.size(); ++i) - h[i] = h1 + i * h2; - return h; - } + virtual HashVector Hash(const void* x, size_t n) const; private: - Hasher hasher1_; - Hasher hasher2_; + Hasher hasher1_; + Hasher hasher2_; }; /** @@ -166,7 +159,7 @@ public: void Add(const T& x) { ++elements_; - AddImpl(hash_->Hash(x)); + AddImpl(hash_->Hash(&x, sizeof(x))); } /** @@ -179,7 +172,7 @@ public: template size_t Count(const T& x) const { - return CountImpl(hash_->Hash(x)); + return CountImpl(hash_->Hash(&x, sizeof(x))); } /** @@ -193,8 +186,6 @@ public: } protected: - typedef std::vector HashVector; - /** * Default-constructs a Bloom filter. */ @@ -206,17 +197,12 @@ protected: */ BloomFilter(HashPolicy* hash); - virtual void AddImpl(const HashVector& hashes) = 0; + virtual void AddImpl(const HashPolicy::HashVector& hashes) = 0; - virtual size_t CountImpl(const HashVector& hashes) const = 0; - - std::vector Hash(const T& x) const - { - return hash_->Hash(&x, sizeof(x)); - } + virtual size_t CountImpl(const HashPolicy::HashVector& hashes) const = 0; private: - HashPolicy* hash_; // Owned by *this. + HashPolicy* hash_; // Owned by *this. size_t elements_; }; @@ -230,19 +216,9 @@ public: BasicBloomFilter(HashPolicy* hash); protected: - virtual void AddImpl(const HashVector& h) - { - for ( size_t i = 0; i < h.size(); ++i ) - bits_.set(h[i] % h.size()); - } + virtual void AddImpl(const HashPolicy::HashVector& h); - virtual size_t CountImpl(const HashVector& h) const - { - for ( size_t i = 0; i < h.size(); ++i ) - if ( ! bits_[h[i] % h.size()] ) - return 0; - return 1; - } + virtual size_t CountImpl(const HashPolicy::HashVector& h) const; private: BitVector bits_; @@ -253,12 +229,15 @@ private: */ class CountingBloomFilter : public BloomFilter { public: - CountingBloomFilter(unsigned width); - CountingBloomFilter(HashPolicy* hash); + CountingBloomFilter(unsigned width, HashPolicy* hash); protected: CountingBloomFilter(); + virtual void AddImpl(const HashPolicy::HashVector& h); + + virtual size_t CountImpl(const HashPolicy::HashVector& h) const; + private: CounterVector cells_; }; From f708cd4a361ba02083380cfe0db2949e3e06cff7 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Mon, 3 Jun 2013 22:55:21 -0700 Subject: [PATCH 009/118] Work on parameter estimation and serialization. --- src/BloomFilter.cc | 131 ++++++++++++++++++++++++++++++++++++++++++++- src/BloomFilter.h | 41 +++++++------- src/NetVar.cc | 2 + src/OpaqueVal.cc | 23 ++++++++ src/OpaqueVal.h | 16 ++++++ src/SerialTypes.h | 7 +++ 6 files changed, 198 insertions(+), 22 deletions(-) diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc index 6873815f69..4787bef0f0 100644 --- a/src/BloomFilter.cc +++ b/src/BloomFilter.cc @@ -1,23 +1,130 @@ #include "BloomFilter.h" +#include +#include "Serializer.h" + +// Backport C++11's std::round(). +namespace { +template +T round(double x) { return (x > 0.0) ? (x + 0.5) : (x - 0.5); } +} // namespace + + +IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR) + +bool CounterVector::DoSerialize(SerialInfo* info) const + { + DO_SERIALIZE(SER_COUNTERVECTOR, SerialObj); + if ( ! SERIALIZE(&bits_) ) + return false; + return SERIALIZE(static_cast(width_)); + } + +bool CounterVector::DoUnserialize(UnserialInfo* info) + { + DO_UNSERIALIZE(SerialObj); + return false; + // TODO: Ask Robin how to unserialize non-pointer members. + //if ( ! UNSERIALIZE(&bits_) ) + // return false; + uint64 width; + if ( ! UNSERIALIZE(&width) ) + return false; + width_ = static_cast(width); + return true; + } + + HashPolicy::HashVector DefaultHashing::Hash(const void* x, size_t n) const { - HashVector h(k(), 0); + HashVector h(K(), 0); for ( size_t i = 0; i < h.size(); ++i ) h[i] = hashers_[i](x, n); return h; } + HashPolicy::HashVector DoubleHashing::Hash(const void* x, size_t n) const { HashType h1 = hasher1_(x); HashType h2 = hasher2_(x); - HashVector h(k(), 0); + HashVector h(K(), 0); for ( size_t i = 0; i < h.size(); ++i ) h[i] = h1 + i * h2; return h; } +bool BloomFilter::Serialize(SerialInfo* info) const + { + return SerialObj::Serialize(info); + } + +BloomFilter* BloomFilter::Unserialize(UnserialInfo* info) + { + return reinterpret_cast( + SerialObj::Unserialize(info, SER_BLOOMFILTER)); + } + +// FIXME: should abstract base classes also have IMPLEMENT_SERIAL? +//IMPLEMENT_SERIAL(BloomFilter, SER_BLOOMFILTER) + +bool BloomFilter::DoSerialize(SerialInfo* info) const + { + DO_SERIALIZE(SER_BLOOMFILTER, SerialObj); + // TODO: Make the hash policy serializable. + //if ( ! SERIALIZE(hash_) ) + // return false; + return SERIALIZE(static_cast(elements_)); + } + +bool BloomFilter::DoUnserialize(UnserialInfo* info) + { + DO_UNSERIALIZE(SerialObj); + // TODO: Make the hash policy serializable. + //if ( ! hash_ = HashPolicy::Unserialize(info) ) + // return false; + uint64 elements; + if ( UNSERIALIZE(&elements) ) + return false; + elements_ = static_cast(elements); + return true; + } + +size_t BasicBloomFilter::Cells(double fp, size_t capacity) + { + double ln2 = std::log(2); + return std::ceil(-(capacity * std::log(fp) / ln2 / ln2)); + } + +size_t BasicBloomFilter::K(size_t cells, size_t capacity) + { + double frac = static_cast(cells) / static_cast(capacity); + return round(frac * std::log(2)); + } + +BasicBloomFilter::BasicBloomFilter(size_t cells, HashPolicy* hash) + : BloomFilter(hash), bits_(cells) + { + } + +IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER) + +bool BasicBloomFilter::DoSerialize(SerialInfo* info) const + { + DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter); + // TODO: Make the hash policy serializable. + //if ( ! SERIALIZE(&bits_) ) + // return false; + return true; + } + +bool BasicBloomFilter::DoUnserialize(UnserialInfo* info) + { + DO_UNSERIALIZE(BloomFilter); + // TODO: Non-pointer member deserialization? + return true; + } + void BasicBloomFilter::AddImpl(const HashPolicy::HashVector& h) { for ( size_t i = 0; i < h.size(); ++i ) @@ -31,3 +138,23 @@ size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const return 0; return 1; } + + +void CountingBloomFilter::AddImpl(const HashPolicy::HashVector& h) + { + for ( size_t i = 0; i < h.size(); ++i ) + cells_.Increment(h[i] % h.size(), 1); + } + +size_t CountingBloomFilter::CountImpl(const HashPolicy::HashVector& h) const + { + CounterVector::size_type min = + std::numeric_limits::max(); + for ( size_t i = 0; i < h.size(); ++i ) + { + CounterVector::size_type cnt = cells_.Count(h[i] % h.size()); + if ( cnt < min ) + min = cnt; + } + return min; + } diff --git a/src/BloomFilter.h b/src/BloomFilter.h index dca4eff2bd..82948f30ec 100644 --- a/src/BloomFilter.h +++ b/src/BloomFilter.h @@ -65,7 +65,7 @@ public: protected: DECLARE_SERIAL(CounterVector); - CounterVector(); + CounterVector() { } private: BitVector bits_; @@ -82,7 +82,7 @@ public: typedef std::vector HashVector; virtual ~HashPolicy() { } - size_t k() const { return k_; } + size_t K() const { return k_; } virtual HashVector Hash(const void* x, size_t n) const = 0; protected: @@ -130,7 +130,7 @@ private: }; /** - * The *double-hashing* policy. Uses a linear combination of 2 hash functions. + * The *double-hashing* policy. Uses a linear combination of two hash functions. */ class DoubleHashing : public HashPolicy { public: @@ -185,25 +185,20 @@ public: return elements_; } -protected: - /** - * Default-constructs a Bloom filter. - */ - BloomFilter(); + bool Serialize(SerialInfo* info) const; + static BloomFilter* Unserialize(UnserialInfo* info); - /** - * Constructs a BloomFilter. - * @param hash The hashing policy. - */ - BloomFilter(HashPolicy* hash); +protected: + DECLARE_SERIAL(BloomFilter); + + BloomFilter() { }; + BloomFilter(HashPolicy* hash) : hash_(hash) { } virtual void AddImpl(const HashPolicy::HashVector& hashes) = 0; - virtual size_t CountImpl(const HashPolicy::HashVector& hashes) const = 0; private: - HashPolicy* hash_; // Owned by *this. - + HashPolicy* hash_; size_t elements_; }; @@ -212,12 +207,17 @@ private: */ class BasicBloomFilter : public BloomFilter { public: - BasicBloomFilter(); - BasicBloomFilter(HashPolicy* hash); + static size_t Cells(double fp, size_t capacity); + static size_t K(size_t cells, size_t capacity); + + BasicBloomFilter(size_t cells, HashPolicy* hash); protected: - virtual void AddImpl(const HashPolicy::HashVector& h); + DECLARE_SERIAL(BasicBloomFilter); + BasicBloomFilter() { } + + virtual void AddImpl(const HashPolicy::HashVector& h); virtual size_t CountImpl(const HashPolicy::HashVector& h) const; private: @@ -232,10 +232,11 @@ public: CountingBloomFilter(unsigned width, HashPolicy* hash); protected: + DECLARE_SERIAL(CountingBloomFilter); + CountingBloomFilter(); virtual void AddImpl(const HashPolicy::HashVector& h); - virtual size_t CountImpl(const HashPolicy::HashVector& h) const; private: diff --git a/src/NetVar.cc b/src/NetVar.cc index 3a23e4c9fa..d8c2192af7 100644 --- a/src/NetVar.cc +++ b/src/NetVar.cc @@ -244,6 +244,7 @@ OpaqueType* md5_type; OpaqueType* sha1_type; OpaqueType* sha256_type; OpaqueType* entropy_type; +OpaqueType* bloomfilter_type; #include "const.bif.netvar_def" #include "types.bif.netvar_def" @@ -310,6 +311,7 @@ void init_general_global_var() sha1_type = new OpaqueType("sha1"); sha256_type = new OpaqueType("sha256"); entropy_type = new OpaqueType("entropy"); + bloomfilter_type = new OpaqueType("bloomfilter"); } void init_net_var() diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index 19346e52f2..a5fb65f53b 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -1,4 +1,6 @@ #include "OpaqueVal.h" + +#include "BloomFilter.h" #include "NetVar.h" #include "Reporter.h" #include "Serializer.h" @@ -515,3 +517,24 @@ bool EntropyVal::DoUnserialize(UnserialInfo* info) return true; } + +BloomFilterVal::BloomFilterVal(OpaqueType* t) : OpaqueVal(t) + { + } + +IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL); + +bool BloomFilterVal::DoSerialize(SerialInfo* info) const + { + DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal); + // TODO: implement. + return true; + } + +bool BloomFilterVal::DoUnserialize(UnserialInfo* info) + { + DO_UNSERIALIZE(OpaqueVal); + // TODO: implement. + return true; + } + diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h index 78fa5da5e9..1c9c0361cc 100644 --- a/src/OpaqueVal.h +++ b/src/OpaqueVal.h @@ -7,6 +7,8 @@ #include "Val.h" #include "digest.h" +class BloomFilter; + class HashVal : public OpaqueVal { public: virtual bool IsValid() const; @@ -107,4 +109,18 @@ private: RandTest state; }; +class BloomFilterVal : public OpaqueVal { +public: + BloomFilterVal(); + +protected: + friend class Val; + BloomFilterVal(OpaqueType* t); + + DECLARE_SERIAL(BloomFilterVal); + +private: + BloomFilter* bloom_filter_; +}; + #endif diff --git a/src/SerialTypes.h b/src/SerialTypes.h index c9c0c34a33..171113ab6a 100644 --- a/src/SerialTypes.h +++ b/src/SerialTypes.h @@ -50,6 +50,9 @@ SERIAL_IS_BO(CASE, 0x1200) SERIAL_IS(LOCATION, 0x1300) SERIAL_IS(RE_MATCHER, 0x1400) SERIAL_IS(BITVECTOR, 0x1500) +SERIAL_IS(COUNTERVECTOR, 0xa000) +SERIAL_IS(BLOOMFILTER, 0xa100) +SERIAL_IS(BASICBLOOMFILTER, 0xa200) // These are the externally visible types. const SerialType SER_NONE = 0; @@ -105,6 +108,7 @@ SERIAL_VAL(MD5_VAL, 16) SERIAL_VAL(SHA1_VAL, 17) SERIAL_VAL(SHA256_VAL, 18) SERIAL_VAL(ENTROPY_VAL, 19) +SERIAL_VAL(BLOOMFILTER_VAL, 20) #define SERIAL_EXPR(name, val) SERIAL_CONST(name, val, EXPR) SERIAL_EXPR(EXPR, 1) @@ -204,5 +208,8 @@ SERIAL_CONST2(CASE) SERIAL_CONST2(LOCATION) SERIAL_CONST2(RE_MATCHER) SERIAL_CONST2(BITVECTOR) +SERIAL_CONST2(COUNTERVECTOR) +SERIAL_CONST2(BLOOMFILTER) +SERIAL_CONST2(BASICBLOOMFILTER) #endif From d3297dd6f3b6a50c07c90e9ad5f61c0ddf762460 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Tue, 4 Jun 2013 13:32:26 -0700 Subject: [PATCH 010/118] Adhere to Bro coding style. --- src/BitVector.cc | 100 +++++++++++++++++++++++------------------------ src/BitVector.h | 40 +++++++++---------- 2 files changed, 69 insertions(+), 71 deletions(-) diff --git a/src/BitVector.cc b/src/BitVector.cc index f57301d506..f029230609 100644 --- a/src/BitVector.cc +++ b/src/BitVector.cc @@ -33,7 +33,7 @@ BitVector::Reference::Reference(block_type& block, block_type i) assert(i < bits_per_block); } -BitVector::Reference& BitVector::Reference::flip() +BitVector::Reference& BitVector::Reference::Flip() { block_ ^= mask_; return *this; @@ -105,7 +105,7 @@ BitVector::BitVector(BitVector const& other) BitVector BitVector::operator~() const { BitVector b(*this); - b.flip(); + b.Flip(); return b; } @@ -130,15 +130,15 @@ BitVector BitVector::operator>>(size_type n) const BitVector& BitVector::operator<<=(size_type n) { if (n >= num_bits_) - return reset(); + return Reset(); if (n > 0) { - size_type last = blocks() - 1; + size_type last = Blocks() - 1; size_type div = n / bits_per_block; block_type r = bit_index(n); block_type* b = &bits_[0]; - assert(blocks() >= 1); + assert(Blocks() >= 1); assert(div <= last); if (r != 0) @@ -164,15 +164,15 @@ BitVector& BitVector::operator<<=(size_type n) BitVector& BitVector::operator>>=(size_type n) { if (n >= num_bits_) - return reset(); + return Reset(); if (n > 0) { - size_type last = blocks() - 1; + size_type last = Blocks() - 1; size_type div = n / bits_per_block; block_type r = bit_index(n); block_type* b = &bits_[0]; - assert(blocks() >= 1); + assert(Blocks() >= 1); assert(div <= last); if (r != 0) @@ -187,39 +187,39 @@ BitVector& BitVector::operator>>=(size_type n) b[i-div] = b[i]; } - std::fill_n(b + (blocks() - div), div, block_type(0)); + std::fill_n(b + (Blocks() - div), div, block_type(0)); } return *this; } BitVector& BitVector::operator&=(BitVector const& other) { - assert(size() >= other.size()); - for (size_type i = 0; i < blocks(); ++i) + assert(Size() >= other.Size()); + for (size_type i = 0; i < Blocks(); ++i) bits_[i] &= other.bits_[i]; return *this; } BitVector& BitVector::operator|=(BitVector const& other) { - assert(size() >= other.size()); - for (size_type i = 0; i < blocks(); ++i) + assert(Size() >= other.Size()); + for (size_type i = 0; i < Blocks(); ++i) bits_[i] |= other.bits_[i]; return *this; } BitVector& BitVector::operator^=(BitVector const& other) { - assert(size() >= other.size()); - for (size_type i = 0; i < blocks(); ++i) + assert(Size() >= other.Size()); + for (size_type i = 0; i < Blocks(); ++i) bits_[i] ^= other.bits_[i]; return *this; } BitVector& BitVector::operator-=(BitVector const& other) { - assert(size() >= other.size()); - for (size_type i = 0; i < blocks(); ++i) + assert(Size() >= other.Size()); + for (size_type i = 0; i < Blocks(); ++i) bits_[i] &= ~other.bits_[i]; return *this; } @@ -260,8 +260,8 @@ bool operator!=(BitVector const& x, BitVector const& y) bool operator<(BitVector const& x, BitVector const& y) { - assert(x.size() == y.size()); - for (BitVector::size_type r = x.blocks(); r > 0; --r) + assert(x.Size() == y.Size()); + for (BitVector::size_type r = x.Blocks(); r > 0; --r) { BitVector::size_type i = r - 1; if (x.bits_[i] < y.bits_[i]) @@ -272,9 +272,9 @@ bool operator<(BitVector const& x, BitVector const& y) return false; } -void BitVector::resize(size_type n, bool value) +void BitVector::Resize(size_type n, bool value) { - size_type old = blocks(); + size_type old = Blocks(); size_type required = bits_to_blocks(n); block_type block_value = value ? ~block_type(0) : block_type(0); @@ -288,27 +288,27 @@ void BitVector::resize(size_type n, bool value) zero_unused_bits(); } -void BitVector::clear() +void BitVector::Clear() { bits_.clear(); num_bits_ = 0; } -void BitVector::push_back(bool bit) +void BitVector::PushBack(bool bit) { - size_type s = size(); - resize(s + 1); - set(s, bit); + size_type s = Size(); + Resize(s + 1); + Set(s, bit); } -void BitVector::append(block_type block) +void BitVector::Append(block_type block) { size_type excess = extra_bits(); if (excess) { - assert(! bits_.empty()); + assert(! Empty()); bits_.push_back(block >> (bits_per_block - excess)); - bits_[bits_.size() - 2] |= (block << excess); + bits_[Blocks() - 2] |= (block << excess); } else { @@ -317,48 +317,46 @@ void BitVector::append(block_type block) num_bits_ += bits_per_block; } -BitVector& BitVector::set(size_type i, bool bit) +BitVector& BitVector::Set(size_type i, bool bit) { assert(i < num_bits_); - if (bit) - bits_[block_index(i)] |= bit_mask(i); + bits_[block_index(i)] |= bit_mask(i); else - reset(i); - + Reset(i); return *this; } -BitVector& BitVector::set() +BitVector& BitVector::Set() { std::fill(bits_.begin(), bits_.end(), ~block_type(0)); zero_unused_bits(); return *this; } -BitVector& BitVector::reset(size_type i) +BitVector& BitVector::Reset(size_type i) { assert(i < num_bits_); bits_[block_index(i)] &= ~bit_mask(i); return *this; } -BitVector& BitVector::reset() +BitVector& BitVector::Reset() { std::fill(bits_.begin(), bits_.end(), block_type(0)); return *this; } -BitVector& BitVector::flip(size_type i) +BitVector& BitVector::Flip(size_type i) { assert(i < num_bits_); bits_[block_index(i)] ^= bit_mask(i); return *this; } -BitVector& BitVector::flip() +BitVector& BitVector::Flip() { - for (size_type i = 0; i < blocks(); ++i) + for (size_type i = 0; i < Blocks(); ++i) bits_[i] = ~bits_[i]; zero_unused_bits(); return *this; @@ -376,11 +374,11 @@ BitVector::Reference BitVector::operator[](size_type i) return Reference(bits_[block_index(i)], bit_index(i)); } -BitVector::size_type BitVector::count() const +BitVector::size_type BitVector::Count() const { std::vector::const_iterator first = bits_.begin(); size_t n = 0; - size_type length = blocks(); + size_type length = Blocks(); while (length) { block_type block = *first; @@ -396,29 +394,29 @@ BitVector::size_type BitVector::count() const return n; } -BitVector::size_type BitVector::blocks() const +BitVector::size_type BitVector::Blocks() const { return bits_.size(); } -BitVector::size_type BitVector::size() const +BitVector::size_type BitVector::Size() const { return num_bits_; } -bool BitVector::empty() const +bool BitVector::Empty() const { return bits_.empty(); } -BitVector::size_type BitVector::find_first() const +BitVector::size_type BitVector::FindFirst() const { return find_from(0); } -BitVector::size_type BitVector::find_next(size_type i) const +BitVector::size_type BitVector::FindNext(size_type i) const { - if (i >= (size() - 1) || size() == 0) + if (i >= (Size() - 1) || Size() == 0) return npos; ++i; size_type bi = block_index(i); @@ -437,7 +435,7 @@ BitVector::size_type BitVector::lowest_bit(block_type block) BitVector::block_type BitVector::extra_bits() const { - return bit_index(size()); + return bit_index(Size()); } void BitVector::zero_unused_bits() @@ -448,9 +446,9 @@ void BitVector::zero_unused_bits() BitVector::size_type BitVector::find_from(size_type i) const { - while (i < blocks() && bits_[i] == 0) + while (i < Blocks() && bits_[i] == 0) ++i; - if (i >= blocks()) + if (i >= Blocks()) return npos; return i * bits_per_block + lowest_bit(bits_[i]); } diff --git a/src/BitVector.h b/src/BitVector.h index 9900dd103e..8315a151f0 100644 --- a/src/BitVector.h +++ b/src/BitVector.h @@ -24,7 +24,7 @@ public: Reference(block_type& block, block_type i); public: - Reference& flip(); + Reference& Flip(); operator bool() const; bool operator~() const; Reference& operator=(bool x); @@ -110,7 +110,7 @@ public: * sequence. */ template - void append(ForwardIterator first, ForwardIterator last) + void Append(ForwardIterator first, ForwardIterator last) { if (first == last) return; @@ -119,7 +119,7 @@ public: typename std::iterator_traits::difference_type delta = std::distance(first, last); - bits_.reserve(blocks() + delta); + bits_.reserve(Blocks() + delta); if (excess == 0) { bits_.back() |= (*first << excess); @@ -140,24 +140,24 @@ public: * Appends the bits in a given block. * @param block The block containing bits to append. */ - void append(block_type block); + void Append(block_type block); /** Appends a single bit to the end of the bit vector. * @param bit The value of the bit. */ - void push_back(bool bit); + void PushBack(bool bit); /** * Clears all bits in the bitvector. */ - void clear(); + void Clear(); /** * Resizes the bit vector to a new number of bits. * @param n The new number of bits of the bit vector. * @param value The bit value of new values, if the vector expands. */ - void resize(size_type n, bool value = false); + void Resize(size_type n, bool value = false); /** * Sets a bit at a specific position to a given value. @@ -165,39 +165,39 @@ public: * @param bit The value assigned to position *i*. * @return A reference to the bit vector instance. */ - BitVector& set(size_type i, bool bit = true); + BitVector& Set(size_type i, bool bit = true); /** * Sets all bits to 1. * @return A reference to the bit vector instance. */ - BitVector& set(); + BitVector& Set(); /** * Resets a bit at a specific position, i.e., sets it to 0. * @param i The bit position. * @return A reference to the bit vector instance. */ - BitVector& reset(size_type i); + BitVector& Reset(size_type i); /** * Sets all bits to 0. * @return A reference to the bit vector instance. */ - BitVector& reset(); + BitVector& Reset(); /** * Toggles/flips a bit at a specific position. * @param i The bit position. * @return A reference to the bit vector instance. */ - BitVector& flip(size_type i); + BitVector& Flip(size_type i); /** * Computes the complement. * @return A reference to the bit vector instance. */ - BitVector& flip(); + BitVector& Flip(); /** Retrieves a single bit. * @param i The bit position. @@ -217,32 +217,32 @@ public: * count* or *Hamming weight*. * @return The number of bits set to 1. */ - size_type count() const; + size_type Count() const; /** * Retrieves the number of blocks of the underlying storage. - * @param The number of blocks that represent `size()` bits. + * @param The number of blocks that represent `Size()` bits. */ - size_type blocks() const; + size_type Blocks() const; /** * Retrieves the number of bits the bitvector consist of. * @return The length of the bit vector in bits. */ - size_type size() const; + size_type Size() const; /** * Checks whether the bit vector is empty. * @return `true` iff the bitvector has zero length. */ - bool empty() const; + bool Empty() const; /** * Finds the bit position of of the first 1-bit. * @return The position of the first bit that equals to one or `npos` if no * such bit exists. */ - size_type find_first() const; + size_type FindFirst() const; /** * Finds the next 1-bit from a given starting position. @@ -252,7 +252,7 @@ public: * @return The position of the first bit that equals to 1 after position * *i* or `npos` if no such bit exists. */ - size_type find_next(size_type i) const; + size_type FindNext(size_type i) const; bool Serialize(SerialInfo* info) const; static BitVector* Unserialize(UnserialInfo* info); From a5572dd66f10ca653855483e0941da327b8422e4 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Tue, 4 Jun 2013 14:31:39 -0700 Subject: [PATCH 011/118] Write CounterVector implementation scaffold. --- src/BloomFilter.cc | 36 ++++++++++++++++++++++++++++++++++++ src/BloomFilter.h | 10 +++++++--- 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc index 4787bef0f0..78048ee588 100644 --- a/src/BloomFilter.cc +++ b/src/BloomFilter.cc @@ -10,6 +10,42 @@ T round(double x) { return (x > 0.0) ? (x + 0.5) : (x - 0.5); } } // namespace +CounterVector::CounterVector(size_t width, size_t cells) + : bits_(new BitVector(width * cells)), width_(width) + { + } + +CounterVector::~CounterVector() + { + delete bits_; + } + +bool CounterVector::Increment(size_type cell, count_type value) + { + // TODO + assert(! "not yet implemented"); + return false; + } + +bool CounterVector::Decrement(size_type cell, count_type value) + { + // TODO + assert(! "not yet implemented"); + return false; + } + +CounterVector::count_type CounterVector::Count(size_type cell) const + { + // TODO + assert(! "not yet implemented"); + return 0; + } + +CounterVector::size_type CounterVector::Size() const + { + return bits_->Blocks() / width_; + } + IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR) bool CounterVector::DoSerialize(SerialInfo* info) const diff --git a/src/BloomFilter.h b/src/BloomFilter.h index 82948f30ec..b4f82efee9 100644 --- a/src/BloomFilter.h +++ b/src/BloomFilter.h @@ -9,7 +9,7 @@ /** * A vector of counters, each of which have a fixed number of bits. */ -class CounterVector : SerialObj { +class CounterVector : public SerialObj { public: typedef size_t size_type; typedef uint64 count_type; @@ -18,8 +18,12 @@ public: * Constructs a counter vector having cells of a given width. * * @param width The number of bits that each cell occupies. + * + * @param cells The number of cells in the bitvector. */ - explicit CounterVector(unsigned width); + CounterVector(size_t width, size_t cells = 1024); + + ~CounterVector(); /** * Increments a given cell. @@ -68,7 +72,7 @@ protected: CounterVector() { } private: - BitVector bits_; + BitVector* bits_; unsigned width_; }; From 751cf612931f021ddf7b5ee51019f20d05e0c309 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Tue, 4 Jun 2013 15:30:27 -0700 Subject: [PATCH 012/118] Add more serialization implementation. --- src/BloomFilter.cc | 93 ++++++++++++++++++++++++++++++++-------------- src/BloomFilter.h | 56 +++++++++++++++++++++++----- src/NetVar.h | 1 + src/OpaqueVal.cc | 18 ++++++--- src/OpaqueVal.h | 1 + src/SerialTypes.h | 2 + 6 files changed, 129 insertions(+), 42 deletions(-) diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc index 78048ee588..64f0e1c67b 100644 --- a/src/BloomFilter.cc +++ b/src/BloomFilter.cc @@ -46,12 +46,23 @@ CounterVector::size_type CounterVector::Size() const return bits_->Blocks() / width_; } +bool CounterVector::Serialize(SerialInfo* info) const + { + return SerialObj::Serialize(info); + } + +CounterVector* CounterVector::Unserialize(UnserialInfo* info) + { + return reinterpret_cast( + SerialObj::Unserialize(info, SER_COUNTERVECTOR)); + } + IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR) bool CounterVector::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_COUNTERVECTOR, SerialObj); - if ( ! SERIALIZE(&bits_) ) + if ( ! SERIALIZE(bits_) ) return false; return SERIALIZE(static_cast(width_)); } @@ -60,9 +71,9 @@ bool CounterVector::DoUnserialize(UnserialInfo* info) { DO_UNSERIALIZE(SerialObj); return false; - // TODO: Ask Robin how to unserialize non-pointer members. - //if ( ! UNSERIALIZE(&bits_) ) - // return false; + bits_ = BitVector::Unserialize(info); + if ( ! bits_ ) + return false; uint64 width; if ( ! UNSERIALIZE(&width) ) return false; @@ -90,6 +101,18 @@ HashPolicy::HashVector DoubleHashing::Hash(const void* x, size_t n) const return h; } + +BloomFilter::BloomFilter(size_t k) + : hash_(new hash_policy(k)) + { + } + +BloomFilter::~BloomFilter() + { + if ( hash_ ) + delete hash_; + } + bool BloomFilter::Serialize(SerialInfo* info) const { return SerialObj::Serialize(info); @@ -101,24 +124,21 @@ BloomFilter* BloomFilter::Unserialize(UnserialInfo* info) SerialObj::Unserialize(info, SER_BLOOMFILTER)); } -// FIXME: should abstract base classes also have IMPLEMENT_SERIAL? -//IMPLEMENT_SERIAL(BloomFilter, SER_BLOOMFILTER) - bool BloomFilter::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_BLOOMFILTER, SerialObj); - // TODO: Make the hash policy serializable. - //if ( ! SERIALIZE(hash_) ) - // return false; - return SERIALIZE(static_cast(elements_)); + if ( ! SERIALIZE(static_cast(hash_->K())) ) + return false; + return SERIALIZE(static_cast(elements_)); } bool BloomFilter::DoUnserialize(UnserialInfo* info) { DO_UNSERIALIZE(SerialObj); - // TODO: Make the hash policy serializable. - //if ( ! hash_ = HashPolicy::Unserialize(info) ) - // return false; + uint16 k; + if ( ! UNSERIALIZE(&k) ) + return false; + hash_ = new hash_policy(static_cast(k)); uint64 elements; if ( UNSERIALIZE(&elements) ) return false; @@ -126,7 +146,7 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info) return true; } -size_t BasicBloomFilter::Cells(double fp, size_t capacity) +size_t BasicBloomFilter::M(double fp, size_t capacity) { double ln2 = std::log(2); return std::ceil(-(capacity * std::log(fp) / ln2 / ln2)); @@ -138,9 +158,16 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity) return round(frac * std::log(2)); } -BasicBloomFilter::BasicBloomFilter(size_t cells, HashPolicy* hash) - : BloomFilter(hash), bits_(cells) +BasicBloomFilter::BasicBloomFilter(double fp, size_t capacity) + : BloomFilter(K(M(fp, capacity), capacity)) { + bits_ = new BitVector(M(fp, capacity)); + } + +BasicBloomFilter::BasicBloomFilter(size_t cells, size_t capacity) + : BloomFilter(K(cells, capacity)) + { + bits_ = new BitVector(cells); } IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER) @@ -148,38 +175,50 @@ IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER) bool BasicBloomFilter::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter); - // TODO: Make the hash policy serializable. - //if ( ! SERIALIZE(&bits_) ) - // return false; - return true; + return SERIALIZE(bits_); } bool BasicBloomFilter::DoUnserialize(UnserialInfo* info) { DO_UNSERIALIZE(BloomFilter); - // TODO: Non-pointer member deserialization? - return true; + bits_ = BitVector::Unserialize(info); + return bits_ == NULL; } void BasicBloomFilter::AddImpl(const HashPolicy::HashVector& h) { for ( size_t i = 0; i < h.size(); ++i ) - bits_.set(h[i] % h.size()); + bits_->Set(h[i] % h.size()); } size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const { for ( size_t i = 0; i < h.size(); ++i ) - if ( ! bits_[h[i] % h.size()] ) + if ( ! (*bits_)[h[i] % h.size()] ) return 0; return 1; } +IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER) + +bool CountingBloomFilter::DoSerialize(SerialInfo* info) const + { + DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter); + return SERIALIZE(cells_); + } + +bool CountingBloomFilter::DoUnserialize(UnserialInfo* info) + { + DO_UNSERIALIZE(BloomFilter); + cells_ = CounterVector::Unserialize(info); + return cells_ == NULL; + } + void CountingBloomFilter::AddImpl(const HashPolicy::HashVector& h) { for ( size_t i = 0; i < h.size(); ++i ) - cells_.Increment(h[i] % h.size(), 1); + cells_->Increment(h[i] % h.size(), 1); } size_t CountingBloomFilter::CountImpl(const HashPolicy::HashVector& h) const @@ -188,7 +227,7 @@ size_t CountingBloomFilter::CountImpl(const HashPolicy::HashVector& h) const std::numeric_limits::max(); for ( size_t i = 0; i < h.size(); ++i ) { - CounterVector::size_type cnt = cells_.Count(h[i] % h.size()); + CounterVector::size_type cnt = cells_->Count(h[i] % h.size()); if ( cnt < min ) min = cnt; } diff --git a/src/BloomFilter.h b/src/BloomFilter.h index b4f82efee9..77c6bc4f56 100644 --- a/src/BloomFilter.h +++ b/src/BloomFilter.h @@ -151,9 +151,13 @@ private: /** * The abstract base class for Bloom filters. */ -class BloomFilter : SerialObj { +class BloomFilter : public SerialObj { public: - virtual ~BloomFilter() { delete hash_; } + // At this point we won't let the user choose the hash policy, but we might + // open up the interface in the future. + typedef DoubleHashing hash_policy; + + virtual ~BloomFilter(); /** * Adds an element of type T to the Bloom filter. @@ -193,10 +197,10 @@ public: static BloomFilter* Unserialize(UnserialInfo* info); protected: - DECLARE_SERIAL(BloomFilter); + DECLARE_ABSTRACT_SERIAL(BloomFilter); BloomFilter() { }; - BloomFilter(HashPolicy* hash) : hash_(hash) { } + BloomFilter(size_t k); virtual void AddImpl(const HashPolicy::HashVector& hashes) = 0; virtual size_t CountImpl(const HashPolicy::HashVector& hashes) const = 0; @@ -211,10 +215,42 @@ private: */ class BasicBloomFilter : public BloomFilter { public: - static size_t Cells(double fp, size_t capacity); + /** + * Computes the number of cells based a given false-positive rate and + * capacity. In the literature, this parameter often has the name *M*. + * + * @param fp The false-positive rate. + * + * @param capacity The number of exepected elements. + * + * Returns: The number cells needed to support a false-positive rate of *fp* + * with at most *capacity* elements. + */ + static size_t M(double fp, size_t capacity); + + /** + * Computes the optimal number of hash functions based on the number cells + * and expected number of elements. + * + * @param cells The number of cells (*m*). + * + * @param capacity The maximum number of elements. + * + * Returns: the optimal number of hash functions for a false-positive rate of + * *fp* for at most *capacity* elements. + */ static size_t K(size_t cells, size_t capacity); - BasicBloomFilter(size_t cells, HashPolicy* hash); + /** + * Constructs a basic Bloom filter with a given false-positive rate and + * capacity. + */ + BasicBloomFilter(double fp, size_t capacity); + + /** + * Constructs a basic Bloom filter with a given number of cells and capacity. + */ + BasicBloomFilter(size_t cells, size_t capacity); protected: DECLARE_SERIAL(BasicBloomFilter); @@ -225,7 +261,7 @@ protected: virtual size_t CountImpl(const HashPolicy::HashVector& h) const; private: - BitVector bits_; + BitVector* bits_; }; /** @@ -233,18 +269,18 @@ private: */ class CountingBloomFilter : public BloomFilter { public: - CountingBloomFilter(unsigned width, HashPolicy* hash); + CountingBloomFilter(unsigned width); protected: DECLARE_SERIAL(CountingBloomFilter); - CountingBloomFilter(); + CountingBloomFilter() { } virtual void AddImpl(const HashPolicy::HashVector& h); virtual size_t CountImpl(const HashPolicy::HashVector& h) const; private: - CounterVector cells_; + CounterVector* cells_; }; #endif diff --git a/src/NetVar.h b/src/NetVar.h index 1a20adcaf2..aa2a14ada5 100644 --- a/src/NetVar.h +++ b/src/NetVar.h @@ -249,6 +249,7 @@ extern OpaqueType* md5_type; extern OpaqueType* sha1_type; extern OpaqueType* sha256_type; extern OpaqueType* entropy_type; +extern OpaqueType* bloomfilter_type; // Initializes globals that don't pertain to network/event analysis. extern void init_general_global_var(); diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index a5fb65f53b..b4f1290436 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -518,23 +518,31 @@ bool EntropyVal::DoUnserialize(UnserialInfo* info) return true; } +BloomFilterVal::BloomFilterVal() : OpaqueVal(bloomfilter_type) + { + } + BloomFilterVal::BloomFilterVal(OpaqueType* t) : OpaqueVal(t) { } +BloomFilterVal::~BloomFilterVal() + { + if ( bloom_filter_ ) + delete bloom_filter_; + } + IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL); bool BloomFilterVal::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal); - // TODO: implement. - return true; + return SERIALIZE(bloom_filter_); } bool BloomFilterVal::DoUnserialize(UnserialInfo* info) { DO_UNSERIALIZE(OpaqueVal); - // TODO: implement. - return true; + bloom_filter_ = BloomFilter::Unserialize(info); + return bloom_filter_ == NULL; } - diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h index 1c9c0361cc..68b42a8a49 100644 --- a/src/OpaqueVal.h +++ b/src/OpaqueVal.h @@ -112,6 +112,7 @@ private: class BloomFilterVal : public OpaqueVal { public: BloomFilterVal(); + ~BloomFilterVal(); protected: friend class Val; diff --git a/src/SerialTypes.h b/src/SerialTypes.h index 171113ab6a..859145f19f 100644 --- a/src/SerialTypes.h +++ b/src/SerialTypes.h @@ -53,6 +53,7 @@ SERIAL_IS(BITVECTOR, 0x1500) SERIAL_IS(COUNTERVECTOR, 0xa000) SERIAL_IS(BLOOMFILTER, 0xa100) SERIAL_IS(BASICBLOOMFILTER, 0xa200) +SERIAL_IS(COUNTINGBLOOMFILTER, 0xa300) // These are the externally visible types. const SerialType SER_NONE = 0; @@ -211,5 +212,6 @@ SERIAL_CONST2(BITVECTOR) SERIAL_CONST2(COUNTERVECTOR) SERIAL_CONST2(BLOOMFILTER) SERIAL_CONST2(BASICBLOOMFILTER) +SERIAL_CONST2(COUNTINGBLOOMFILTER) #endif From 880d02f7204d21fc0e69f08ac78e963042df4f16 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Wed, 5 Jun 2013 16:16:55 -0700 Subject: [PATCH 013/118] Associate a Comphash with a BloomFilterVal. We also keep track of the Bloom filter's element type inside each value. The first use of the BiF bloomfilter_add will "typify" the Bloom filter and lock the Bloom filter's type to the element type. --- src/BloomFilter.cc | 15 ++++++++++++ src/BloomFilter.h | 3 ++- src/OpaqueVal.cc | 60 ++++++++++++++++++++++++++++++++++++++++++++-- src/OpaqueVal.h | 18 ++++++++++++-- 4 files changed, 91 insertions(+), 5 deletions(-) diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc index 64f0e1c67b..74fa6fb255 100644 --- a/src/BloomFilter.cc +++ b/src/BloomFilter.cc @@ -199,6 +199,21 @@ size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const return 1; } +CountingBloomFilter::CountingBloomFilter(double fp, size_t capacity, + size_t width) + : BloomFilter(BasicBloomFilter::K(BasicBloomFilter::M(fp, capacity), + capacity)) + { + cells_ = new CounterVector(width, BasicBloomFilter::M(fp, capacity)); + } + +CountingBloomFilter::CountingBloomFilter(size_t cells, size_t capacity, + size_t width) + : BloomFilter(BasicBloomFilter::K(cells, capacity)) + { + cells_ = new CounterVector(width, cells); + } + IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER) diff --git a/src/BloomFilter.h b/src/BloomFilter.h index 77c6bc4f56..14b0ac3281 100644 --- a/src/BloomFilter.h +++ b/src/BloomFilter.h @@ -269,7 +269,8 @@ private: */ class CountingBloomFilter : public BloomFilter { public: - CountingBloomFilter(unsigned width); + CountingBloomFilter(double fp, size_t capacity, size_t width); + CountingBloomFilter(size_t cells, size_t capacity, size_t width); protected: DECLARE_SERIAL(CountingBloomFilter); diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index b4f1290436..abfd8f320f 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -518,31 +518,87 @@ bool EntropyVal::DoUnserialize(UnserialInfo* info) return true; } -BloomFilterVal::BloomFilterVal() : OpaqueVal(bloomfilter_type) +BloomFilterVal::BloomFilterVal(BloomFilter* bf) + : OpaqueVal(bloomfilter_type), bloom_filter_(bf) { } -BloomFilterVal::BloomFilterVal(OpaqueType* t) : OpaqueVal(t) +BloomFilterVal::BloomFilterVal(OpaqueType* t) + : OpaqueVal(t) { } +bool BloomFilterVal::Typify(BroType* type) + { + if ( type_ ) + return false; + type_ = type; + TypeList* tl = new TypeList(type_); + tl->Append(type_); + hash_ = new CompositeHash(tl); + Unref(tl); + return true; + } + +BroType* BloomFilterVal::Type() const + { + return type_; + } + +void BloomFilterVal::Add(const Val* val) + { + HashKey* key = hash_->ComputeHash(val, 1); + bloom_filter_->Add(key->Hash()); + } + +size_t BloomFilterVal::Count(const Val* val) const + { + HashKey* key = hash_->ComputeHash(val, 1); + return bloom_filter_->Count(key->Hash()); + } + +BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* first, + const BloomFilterVal* second) +{ + assert(! "not yet implemented"); + return NULL; + } + BloomFilterVal::~BloomFilterVal() { + if ( type_ ) + Unref(type_); + if ( hash_ ) + delete hash_; if ( bloom_filter_ ) delete bloom_filter_; } +BloomFilterVal::BloomFilterVal() + : OpaqueVal(bloomfilter_type) + { + } + IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL); bool BloomFilterVal::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal); + if ( ! SERIALIZE(type_) ) + return false; return SERIALIZE(bloom_filter_); } bool BloomFilterVal::DoUnserialize(UnserialInfo* info) { DO_UNSERIALIZE(OpaqueVal); + type_ = BroType::Unserialize(info); + if ( ! type_ ) + return false; + TypeList* tl = new TypeList(type_); + tl->Append(type_); + hash_ = new CompositeHash(tl); + Unref(tl); bloom_filter_ = BloomFilter::Unserialize(info); return bloom_filter_ == NULL; } diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h index 68b42a8a49..e97a530f3a 100644 --- a/src/OpaqueVal.h +++ b/src/OpaqueVal.h @@ -110,18 +110,32 @@ private: }; class BloomFilterVal : public OpaqueVal { + BloomFilterVal(const BloomFilterVal&); + BloomFilterVal& operator=(const BloomFilterVal&); public: - BloomFilterVal(); + static BloomFilterVal* Merge(const BloomFilterVal* first, + const BloomFilterVal* second); + + BloomFilterVal(BloomFilter* bf); ~BloomFilterVal(); + bool Typify(BroType* type); + BroType* Type() const; + + void Add(const Val* val); + size_t Count(const Val* val) const; + protected: friend class Val; + BloomFilterVal(); BloomFilterVal(OpaqueType* t); DECLARE_SERIAL(BloomFilterVal); private: - BloomFilter* bloom_filter_; + BroType* type_; + CompositeHash* hash_; + BloomFilter* bloom_filter_; }; #endif From 3d9764213191070a6b68375c0d0ae8c3193528e3 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Wed, 5 Jun 2013 16:26:16 -0700 Subject: [PATCH 014/118] Add Bloom filter BiFs. --- src/bro.bif | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/src/bro.bif b/src/bro.bif index d9558106a7..60fb985dda 100644 --- a/src/bro.bif +++ b/src/bro.bif @@ -5730,3 +5730,92 @@ function anonymize_addr%(a: addr, cl: IPAddrAnonymizationClass%): addr } %} +# =========================================================================== +# +# Bloom Filter Functions +# +# =========================================================================== + +%%{ +#include "BloomFilter.h" +%%} + +## Initializes a Bloom filter data structure. +## +## fp: The desired false-positive rate. +## +## capacity: the maximum number of elements that guarantees a false-positive +## rate of *fp*. +## +## Returns: A Bloom filter handle. +function bloomfilter_init%(fp: double, capacity: count, + max: count &default=1%): opaque of bloomfilter + %{ + BloomFilter* bf; + if ( max == 1 ) + { + bf = new BasicBloomFilter(fp, capacity); + } + else + { + uint16 width = 0; + while ( max >>= 1 ) + ++width; + bf = new CountingBloomFilter(fp, capacity, width); + } + return new BloomFilterVal(bf); + %} + +## Adds an element to a Bloom filter. +## +## bf: The Bloom filter handle. +## +## x: The element to add. +function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any + %{ + BloomFilterVal* bfv = static_cast(bf); + if ( ! bfv->Type() || ! bfv->Typify(x->Type()) ) + reporter->Error("failed to set Bloom filter type"); + else if ( bfv->Type() != x->Type() ) + reporter->Error("incompatible Bloom filter types"); + bfv->Add(x); + return 0; + %} + +## Retrieves the counter for a given element in a Bloom filter. +## +## bf: The Bloom filter handle. +## +## x: The element to count. +## +## Returns: the counter associated with *x* in *bf*. +function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count + %{ + BloomFilterVal* bfv = static_cast(bf); + if ( ! bfv->Type() ) + reporter->Error("cannot perform lookup on untyped Bloom filter"); + else if ( bfv->Type() != x->Type() ) + reporter->Error("incompatible Bloom filter types"); + return new Val(static_cast(bfv->Count(x)), TYPE_COUNT); + %} + +## Merges two Bloom filters. +## +## bf1: The first Bloom filter handle. +## +## bf2: The second Bloom filter handle. +## +## Returns: The union of *bf1* and *bf2*. +function bloomfilter_merge%(bf1: opaque of bloomfilter, + bf2: opaque of bloomfilter%): opaque of bloomfilter + %{ + const BloomFilterVal* bfv1 = static_cast(bf1); + const BloomFilterVal* bfv2 = static_cast(bf2); + if ( ! bfv1->Type() ) + reporter->Error("The first Bloom filter has not yet been typed"); + if ( ! bfv2->Type() ) + reporter->Error("The second Bloom filter has not yet been typed"); + else if ( bfv1->Type() != bfv2->Type() ) + reporter->Error("incompatible Bloom filter types"); + return BloomFilterVal::Merge(bfv1, bfv2); + %} From d5126a13395f899fab12f081248336e687222ed9 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Wed, 5 Jun 2013 17:45:10 -0700 Subject: [PATCH 015/118] Fix some BiF issues. --- src/bro.bif | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/bro.bif b/src/bro.bif index 60fb985dda..08b532eaea 100644 --- a/src/bro.bif +++ b/src/bro.bif @@ -5774,12 +5774,18 @@ function bloomfilter_init%(fp: double, capacity: count, function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any %{ BloomFilterVal* bfv = static_cast(bf); - if ( ! bfv->Type() || ! bfv->Typify(x->Type()) ) + if ( ! bfv->Type() && ! bfv->Typify(x->Type()) ) + { reporter->Error("failed to set Bloom filter type"); + return NULL; + } else if ( bfv->Type() != x->Type() ) + { reporter->Error("incompatible Bloom filter types"); + return NULL; + } bfv->Add(x); - return 0; + return NULL; %} ## Retrieves the counter for a given element in a Bloom filter. @@ -5812,9 +5818,9 @@ function bloomfilter_merge%(bf1: opaque of bloomfilter, const BloomFilterVal* bfv1 = static_cast(bf1); const BloomFilterVal* bfv2 = static_cast(bf2); if ( ! bfv1->Type() ) - reporter->Error("The first Bloom filter has not yet been typed"); + reporter->Error("first Bloom filter has not yet been typed"); if ( ! bfv2->Type() ) - reporter->Error("The second Bloom filter has not yet been typed"); + reporter->Error("second Bloom filter has not yet been typed"); else if ( bfv1->Type() != bfv2->Type() ) reporter->Error("incompatible Bloom filter types"); return BloomFilterVal::Merge(bfv1, bfv2); From 012e09c5c40bdf0acd29a34bf2271417ed36d770 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Thu, 6 Jun 2013 12:56:46 -0700 Subject: [PATCH 016/118] Small fixes and simplifications. --- src/BloomFilter.cc | 2 +- src/BloomFilter.h | 17 +++++++---------- src/OpaqueVal.cc | 1 + 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc index 74fa6fb255..e549553bf4 100644 --- a/src/BloomFilter.cc +++ b/src/BloomFilter.cc @@ -140,7 +140,7 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info) return false; hash_ = new hash_policy(static_cast(k)); uint64 elements; - if ( UNSERIALIZE(&elements) ) + if ( ! UNSERIALIZE(&elements) ) return false; elements_ = static_cast(elements); return true; diff --git a/src/BloomFilter.h b/src/BloomFilter.h index 14b0ac3281..3e2bd5de90 100644 --- a/src/BloomFilter.h +++ b/src/BloomFilter.h @@ -94,15 +94,14 @@ protected: * A functor that computes a universal hash function. * @tparam Codomain An integral type. */ - template class Hasher { public: - template - Codomain operator()(const Domain& x) const + template + HashType operator()(const T& x) const { return h3_(&x, sizeof(x)); } - Codomain operator()(const void* x, size_t n) const + HashType operator()(const void* x, size_t n) const { return h3_(x, n); } @@ -110,7 +109,7 @@ protected: // FIXME: The hardcoded value of 36 comes from UHASH_KEY_SIZE defined in // Hash.h. I do not know how this value impacts the hash function behavior // so I'll just copy it verbatim. (Matthias) - H3 h3_; + H3 h3_; }; HashPolicy(size_t k) : k_(k) { } @@ -125,12 +124,11 @@ private: class DefaultHashing : public HashPolicy { public: DefaultHashing(size_t k) : HashPolicy(k), hashers_(k) { } - virtual ~DefaultHashing() { } virtual HashVector Hash(const void* x, size_t n) const; private: - std::vector< Hasher > hashers_; + std::vector hashers_; }; /** @@ -139,13 +137,12 @@ private: class DoubleHashing : public HashPolicy { public: DoubleHashing(size_t k) : HashPolicy(k) { } - virtual ~DoubleHashing() { } virtual HashVector Hash(const void* x, size_t n) const; private: - Hasher hasher1_; - Hasher hasher2_; + Hasher hasher1_; + Hasher hasher2_; }; /** diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index abfd8f320f..03a6e51ce8 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -533,6 +533,7 @@ bool BloomFilterVal::Typify(BroType* type) if ( type_ ) return false; type_ = type; + type_->Ref(); TypeList* tl = new TypeList(type_); tl->Append(type_); hash_ = new CompositeHash(tl); From f211b856c9ae35e68ea4af194e08157fdefef7e6 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Thu, 6 Jun 2013 13:13:36 -0700 Subject: [PATCH 017/118] Catch invalid values of the false-positive rate. --- src/bro.bif | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/bro.bif b/src/bro.bif index 08b532eaea..74219dd2b7 100644 --- a/src/bro.bif +++ b/src/bro.bif @@ -5751,6 +5751,11 @@ function anonymize_addr%(a: addr, cl: IPAddrAnonymizationClass%): addr function bloomfilter_init%(fp: double, capacity: count, max: count &default=1%): opaque of bloomfilter %{ + if ( fp < 0.0 || fp > 1.0 ) + { + reporter->Error("false-positive rate must take value between 0 and 1"); + return NULL; + } BloomFilter* bf; if ( max == 1 ) { From 7ce986e31f59b1f1000ec335a4efc1f0f5e0c011 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Thu, 6 Jun 2013 13:21:27 -0700 Subject: [PATCH 018/118] Fix modding. --- src/BloomFilter.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc index e549553bf4..7c347927c3 100644 --- a/src/BloomFilter.cc +++ b/src/BloomFilter.cc @@ -188,13 +188,13 @@ bool BasicBloomFilter::DoUnserialize(UnserialInfo* info) void BasicBloomFilter::AddImpl(const HashPolicy::HashVector& h) { for ( size_t i = 0; i < h.size(); ++i ) - bits_->Set(h[i] % h.size()); + bits_->Set(h[i] % bits_->Size()); } size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const { for ( size_t i = 0; i < h.size(); ++i ) - if ( ! (*bits_)[h[i] % h.size()] ) + if ( ! (*bits_)[h[i] % bits_->Size()] ) return 0; return 1; } @@ -233,7 +233,7 @@ bool CountingBloomFilter::DoUnserialize(UnserialInfo* info) void CountingBloomFilter::AddImpl(const HashPolicy::HashVector& h) { for ( size_t i = 0; i < h.size(); ++i ) - cells_->Increment(h[i] % h.size(), 1); + cells_->Increment(h[i] % cells_->Size(), 1); } size_t CountingBloomFilter::CountImpl(const HashPolicy::HashVector& h) const @@ -242,7 +242,7 @@ size_t CountingBloomFilter::CountImpl(const HashPolicy::HashVector& h) const std::numeric_limits::max(); for ( size_t i = 0; i < h.size(); ++i ) { - CounterVector::size_type cnt = cells_->Count(h[i] % h.size()); + CounterVector::size_type cnt = cells_->Count(h[i] % cells_->Size()); if ( cnt < min ) min = cnt; } From fcf1807fc8ac320a6c787360e8b78509b58b0a5a Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Thu, 6 Jun 2013 13:39:00 -0700 Subject: [PATCH 019/118] Fix hasher usage and narrow interface. --- src/BloomFilter.cc | 4 ++-- src/BloomFilter.h | 10 +--------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc index 7c347927c3..c684c82c0e 100644 --- a/src/BloomFilter.cc +++ b/src/BloomFilter.cc @@ -93,8 +93,8 @@ HashPolicy::HashVector DefaultHashing::Hash(const void* x, size_t n) const HashPolicy::HashVector DoubleHashing::Hash(const void* x, size_t n) const { - HashType h1 = hasher1_(x); - HashType h2 = hasher2_(x); + HashType h1 = hasher1_(x, n); + HashType h2 = hasher2_(x, n); HashVector h(K(), 0); for ( size_t i = 0; i < h.size(); ++i ) h[i] = h1 + i * h2; diff --git a/src/BloomFilter.h b/src/BloomFilter.h index 3e2bd5de90..fd1cb31d61 100644 --- a/src/BloomFilter.h +++ b/src/BloomFilter.h @@ -96,15 +96,7 @@ protected: */ class Hasher { public: - template - HashType operator()(const T& x) const - { - return h3_(&x, sizeof(x)); - } - HashType operator()(const void* x, size_t n) const - { - return h3_(x, n); - } + HashType operator()(const void* x, size_t n) const { return h3_(x, n); } private: // FIXME: The hardcoded value of 36 comes from UHASH_KEY_SIZE defined in // Hash.h. I do not know how this value impacts the hash function behavior From 0d299eca57ddab9dfb17c1f6c99139c481dccb49 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Thu, 6 Jun 2013 14:54:25 -0700 Subject: [PATCH 020/118] Correct computation of k hash functions. --- src/BloomFilter.cc | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc index c684c82c0e..f1db71ae1d 100644 --- a/src/BloomFilter.cc +++ b/src/BloomFilter.cc @@ -3,13 +3,6 @@ #include #include "Serializer.h" -// Backport C++11's std::round(). -namespace { -template -T round(double x) { return (x > 0.0) ? (x + 0.5) : (x - 0.5); } -} // namespace - - CounterVector::CounterVector(size_t width, size_t cells) : bits_(new BitVector(width * cells)), width_(width) { @@ -155,7 +148,7 @@ size_t BasicBloomFilter::M(double fp, size_t capacity) size_t BasicBloomFilter::K(size_t cells, size_t capacity) { double frac = static_cast(cells) / static_cast(capacity); - return round(frac * std::log(2)); + return std::ceil(frac * std::log(2)); } BasicBloomFilter::BasicBloomFilter(double fp, size_t capacity) From e15f03d980e8bb63d00969268056b2e9592b2f85 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Thu, 6 Jun 2013 15:02:11 -0700 Subject: [PATCH 021/118] Cleanup BiFs. --- src/bro.bif | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/src/bro.bif b/src/bro.bif index 5c1280645e..8bd9575498 100644 --- a/src/bro.bif +++ b/src/bro.bif @@ -5026,16 +5026,11 @@ function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any %{ BloomFilterVal* bfv = static_cast(bf); if ( ! bfv->Type() && ! bfv->Typify(x->Type()) ) - { reporter->Error("failed to set Bloom filter type"); - return NULL; - } else if ( bfv->Type() != x->Type() ) - { reporter->Error("incompatible Bloom filter types"); - return NULL; - } - bfv->Add(x); + else + bfv->Add(x); return NULL; %} @@ -5048,12 +5043,14 @@ function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any ## Returns: the counter associated with *x* in *bf*. function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count %{ - BloomFilterVal* bfv = static_cast(bf); + const BloomFilterVal* bfv = static_cast(bf); if ( ! bfv->Type() ) reporter->Error("cannot perform lookup on untyped Bloom filter"); else if ( bfv->Type() != x->Type() ) reporter->Error("incompatible Bloom filter types"); - return new Val(static_cast(bfv->Count(x)), TYPE_COUNT); + else + return new Val(static_cast(bfv->Count(x)), TYPE_COUNT); + return new Val(0, TYPE_COUNT); %} ## Merges two Bloom filters. @@ -5068,11 +5065,9 @@ function bloomfilter_merge%(bf1: opaque of bloomfilter, %{ const BloomFilterVal* bfv1 = static_cast(bf1); const BloomFilterVal* bfv2 = static_cast(bf2); - if ( ! bfv1->Type() ) - reporter->Error("first Bloom filter has not yet been typed"); - if ( ! bfv2->Type() ) - reporter->Error("second Bloom filter has not yet been typed"); - else if ( bfv1->Type() != bfv2->Type() ) + if ( bfv1->Type() != bfv2->Type() ) reporter->Error("incompatible Bloom filter types"); - return BloomFilterVal::Merge(bfv1, bfv2); + else + return BloomFilterVal::Merge(bfv1, bfv2); + return NULL; %} From 86becdd6e467fabc475eb81baea6d3586b2d74e7 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Thu, 6 Jun 2013 15:08:24 -0700 Subject: [PATCH 022/118] Add tests. --- testing/btest/bifs/bloomfilter.bro | 38 ++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 testing/btest/bifs/bloomfilter.bro diff --git a/testing/btest/bifs/bloomfilter.bro b/testing/btest/bifs/bloomfilter.bro new file mode 100644 index 0000000000..6abbdd69f7 --- /dev/null +++ b/testing/btest/bifs/bloomfilter.bro @@ -0,0 +1,38 @@ +# @TEST-EXEC: bro -b %INPUT >output +# @TEST-EXEC: btest-diff output + +event bro_init() + { + # Basic usage with counts. + local bf_cnt = bloomfilter_init(0.1, 1000); + bloomfilter_add(bf_cnt, 42); + bloomfilter_add(bf_cnt, 84); + bloomfilter_add(bf_cnt, 168); + print bloomfilter_lookup(bf_cnt, 0); + print bloomfilter_lookup(bf_cnt, 42); + print bloomfilter_lookup(bf_cnt, 168); + print bloomfilter_lookup(bf_cnt, 336); + bloomfilter_add(bf_cnt, 0.5); # Type mismatch + bloomfilter_add(bf_cnt, "foo"); # Type mismatch + + # Basic usage with strings. + local bf_str = bloomfilter_init(0.9, 10); + bloomfilter_add(bf_str, "foo"); + bloomfilter_add(bf_str, "bar"); + print bloomfilter_lookup(bf_str, "foo"); + print bloomfilter_lookup(bf_str, "bar"); + print bloomfilter_lookup(bf_str, "baz"); + print bloomfilter_lookup(bf_str, "qux"); + bloomfilter_add(bf_str, 0.5); # Type mismatch + bloomfilter_add(bf_str, 100); # Type mismatch + + # Edge cases. + local bf_edge0 = bloomfilter_init(0.000000000001, 1); + local bf_edge1 = bloomfilter_init(0.00000001, 100000000); + local bf_edge2 = bloomfilter_init(0.9999999, 1); + local bf_edge3 = bloomfilter_init(0.9999999, 100000000000); + + # Invalid parameters. + local bf_bug0 = bloomfilter_init(-0.5, 42); + local bf_bug1 = bloomfilter_init(1.1, 42); + } From f2d536d2da1118b1d5feb143f751d47dc344232b Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Thu, 6 Jun 2013 15:22:04 -0700 Subject: [PATCH 023/118] Add missing initializations. --- src/BloomFilter.cc | 15 +++++++++++++++ src/BloomFilter.h | 6 +++--- src/OpaqueVal.cc | 25 +++++++++++++++++-------- 3 files changed, 35 insertions(+), 11 deletions(-) diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc index f1db71ae1d..40772fecb6 100644 --- a/src/BloomFilter.cc +++ b/src/BloomFilter.cc @@ -95,6 +95,11 @@ HashPolicy::HashVector DoubleHashing::Hash(const void* x, size_t n) const } +BloomFilter::BloomFilter() + : hash_(NULL) + { + } + BloomFilter::BloomFilter(size_t k) : hash_(new hash_policy(k)) { @@ -151,6 +156,11 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity) return std::ceil(frac * std::log(2)); } +BasicBloomFilter::BasicBloomFilter() + : bits_(NULL) + { + } + BasicBloomFilter::BasicBloomFilter(double fp, size_t capacity) : BloomFilter(K(M(fp, capacity), capacity)) { @@ -192,6 +202,11 @@ size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const return 1; } +CountingBloomFilter::CountingBloomFilter() + : cells_(NULL) + { + } + CountingBloomFilter::CountingBloomFilter(double fp, size_t capacity, size_t width) : BloomFilter(BasicBloomFilter::K(BasicBloomFilter::M(fp, capacity), diff --git a/src/BloomFilter.h b/src/BloomFilter.h index fd1cb31d61..c0101cadf8 100644 --- a/src/BloomFilter.h +++ b/src/BloomFilter.h @@ -188,7 +188,7 @@ public: protected: DECLARE_ABSTRACT_SERIAL(BloomFilter); - BloomFilter() { }; + BloomFilter(); BloomFilter(size_t k); virtual void AddImpl(const HashPolicy::HashVector& hashes) = 0; @@ -244,7 +244,7 @@ public: protected: DECLARE_SERIAL(BasicBloomFilter); - BasicBloomFilter() { } + BasicBloomFilter(); virtual void AddImpl(const HashPolicy::HashVector& h); virtual size_t CountImpl(const HashPolicy::HashVector& h) const; @@ -264,7 +264,7 @@ public: protected: DECLARE_SERIAL(CountingBloomFilter); - CountingBloomFilter() { } + CountingBloomFilter(); virtual void AddImpl(const HashPolicy::HashVector& h); virtual size_t CountImpl(const HashPolicy::HashVector& h) const; diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index 03a6e51ce8..38ea93d000 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -518,13 +518,27 @@ bool EntropyVal::DoUnserialize(UnserialInfo* info) return true; } -BloomFilterVal::BloomFilterVal(BloomFilter* bf) - : OpaqueVal(bloomfilter_type), bloom_filter_(bf) +BloomFilterVal::BloomFilterVal() + : OpaqueVal(bloomfilter_type), + type_(NULL), + hash_(NULL), + bloom_filter_(NULL) { } BloomFilterVal::BloomFilterVal(OpaqueType* t) - : OpaqueVal(t) + : OpaqueVal(t), + type_(NULL), + hash_(NULL), + bloom_filter_(NULL) + { + } + +BloomFilterVal::BloomFilterVal(BloomFilter* bf) + : OpaqueVal(bloomfilter_type), + type_(NULL), + hash_(NULL), + bloom_filter_(bf) { } @@ -575,11 +589,6 @@ BloomFilterVal::~BloomFilterVal() delete bloom_filter_; } -BloomFilterVal::BloomFilterVal() - : OpaqueVal(bloomfilter_type) - { - } - IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL); bool BloomFilterVal::DoSerialize(SerialInfo* info) const From c6381055380f889c4891efcf83da512597ae64d6 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Mon, 10 Jun 2013 12:51:41 -0700 Subject: [PATCH 024/118] Document max parameter in bloomfilter_init. --- src/bro.bif | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/bro.bif b/src/bro.bif index 8bd9575498..9b80c90dbf 100644 --- a/src/bro.bif +++ b/src/bro.bif @@ -4993,6 +4993,13 @@ function anonymize_addr%(a: addr, cl: IPAddrAnonymizationClass%): addr ## capacity: the maximum number of elements that guarantees a false-positive ## rate of *fp*. ## +## max: The maximum counter value associated with each each element in the +## Bloom filter. If greater than 1, each element in the set has a counter of +## *w = ceil(log_2(max))* bits. Each bit in the underlying bit vector then +## becomes a cell of size *w* bits. Since the number number of cells is a +## function ## of *fp* and *capacity*, it is important to consider the effects +## on space when tuning this value. +## ## Returns: A Bloom filter handle. function bloomfilter_init%(fp: double, capacity: count, max: count &default=1%): opaque of bloomfilter From d25984ba45643be524788b73d7cebc1278a78810 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Mon, 10 Jun 2013 12:55:03 -0700 Subject: [PATCH 025/118] Update baseline for unit tests. --- testing/btest/Baseline/bifs.bloomfilter/output | 8 ++++++++ testing/btest/bifs/bloomfilter.bro | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) create mode 100644 testing/btest/Baseline/bifs.bloomfilter/output diff --git a/testing/btest/Baseline/bifs.bloomfilter/output b/testing/btest/Baseline/bifs.bloomfilter/output new file mode 100644 index 0000000000..65aaa8b07c --- /dev/null +++ b/testing/btest/Baseline/bifs.bloomfilter/output @@ -0,0 +1,8 @@ +0 +1 +1 +0 +1 +1 +1 +1 diff --git a/testing/btest/bifs/bloomfilter.bro b/testing/btest/bifs/bloomfilter.bro index 6abbdd69f7..769cec1200 100644 --- a/testing/btest/bifs/bloomfilter.bro +++ b/testing/btest/bifs/bloomfilter.bro @@ -21,8 +21,8 @@ event bro_init() bloomfilter_add(bf_str, "bar"); print bloomfilter_lookup(bf_str, "foo"); print bloomfilter_lookup(bf_str, "bar"); - print bloomfilter_lookup(bf_str, "baz"); - print bloomfilter_lookup(bf_str, "qux"); + print bloomfilter_lookup(bf_str, "baz"); # FP + print bloomfilter_lookup(bf_str, "qux"); # FP bloomfilter_add(bf_str, 0.5); # Type mismatch bloomfilter_add(bf_str, 100); # Type mismatch From 4c21576c120a0dcc9725308549fd57a8bf9072a1 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Mon, 10 Jun 2013 20:14:34 -0700 Subject: [PATCH 026/118] Add Bloomfilter serialization test code. --- testing/btest/istate/opaque.bro | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/testing/btest/istate/opaque.bro b/testing/btest/istate/opaque.bro index 84818a5e70..ac3b2c0874 100644 --- a/testing/btest/istate/opaque.bro +++ b/testing/btest/istate/opaque.bro @@ -12,6 +12,9 @@ global sha1_handle: opaque of sha1 &persistent &synchronized; global sha256_handle: opaque of sha256 &persistent &synchronized; global entropy_handle: opaque of entropy &persistent &synchronized; +global bloomfilter_elements: set[string] &persistent &synchronized; +global bloomfilter_handle: opaque of bloomfilter &persistent &synchronized; + event bro_done() { local out = open("output.log"); @@ -36,6 +39,9 @@ event bro_done() print out, entropy_test_finish(entropy_handle); else print out, "entropy_test_add() failed"; + + for ( e in bloomfilter_elements ) + print bloomfilter_lookup(bloomfilter_handle, e); } @TEST-END-FILE @@ -47,6 +53,9 @@ global sha1_handle: opaque of sha1 &persistent &synchronized; global sha256_handle: opaque of sha256 &persistent &synchronized; global entropy_handle: opaque of entropy &persistent &synchronized; +global bloomfilter_elements = { "foo", "bar", "baz" } &persistent &synchronized; +global bloomfilter_handle: opaque of bloomfilter &persistent &synchronized; + event bro_init() { local out = open("expected.log"); @@ -72,6 +81,10 @@ event bro_init() entropy_handle = entropy_test_init(); if ( ! entropy_test_add(entropy_handle, "f") ) print out, "entropy_test_add() failed"; + + bloomfilter_handle = bloomfilter_init(0.1, 100); + for ( e in bloomfilter_elements ) + bloomfilter_add(bloomfilter_handle, e); } @TEST-END-FILE From 22afbe42dd91e668de8c72417b6a8ff8b544dd99 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Mon, 10 Jun 2013 20:15:13 -0700 Subject: [PATCH 027/118] A number of tweaks of the serialization code. --- src/BitVector.h | 2 +- src/BloomFilter.cc | 17 ++++++++--------- src/BloomFilter.h | 2 +- src/OpaqueVal.cc | 10 ++++++---- src/SerialTypes.h | 8 ++++---- 5 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/BitVector.h b/src/BitVector.h index 8315a151f0..83fec44a0d 100644 --- a/src/BitVector.h +++ b/src/BitVector.h @@ -8,7 +8,7 @@ /** * A vector of bits. */ -class BitVector : SerialObj { +class BitVector : public SerialObj { public: typedef size_t block_type; typedef size_t size_type; diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc index 40772fecb6..1d73734236 100644 --- a/src/BloomFilter.cc +++ b/src/BloomFilter.cc @@ -55,7 +55,7 @@ IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR) bool CounterVector::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_COUNTERVECTOR, SerialObj); - if ( ! SERIALIZE(bits_) ) + if ( ! bits_->Serialize(info) ) return false; return SERIALIZE(static_cast(width_)); } @@ -63,14 +63,13 @@ bool CounterVector::DoSerialize(SerialInfo* info) const bool CounterVector::DoUnserialize(UnserialInfo* info) { DO_UNSERIALIZE(SerialObj); - return false; bits_ = BitVector::Unserialize(info); if ( ! bits_ ) return false; uint64 width; if ( ! UNSERIALIZE(&width) ) return false; - width_ = static_cast(width); + width_ = static_cast(width); return true; } @@ -127,7 +126,7 @@ bool BloomFilter::DoSerialize(SerialInfo* info) const DO_SERIALIZE(SER_BLOOMFILTER, SerialObj); if ( ! SERIALIZE(static_cast(hash_->K())) ) return false; - return SERIALIZE(static_cast(elements_)); + return SERIALIZE(static_cast(elements_)); } bool BloomFilter::DoUnserialize(UnserialInfo* info) @@ -178,14 +177,14 @@ IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER) bool BasicBloomFilter::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter); - return SERIALIZE(bits_); + return bits_->Serialize(info); } bool BasicBloomFilter::DoUnserialize(UnserialInfo* info) { DO_UNSERIALIZE(BloomFilter); bits_ = BitVector::Unserialize(info); - return bits_ == NULL; + return bits_ != NULL; } void BasicBloomFilter::AddImpl(const HashPolicy::HashVector& h) @@ -227,15 +226,15 @@ IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER) bool CountingBloomFilter::DoSerialize(SerialInfo* info) const { - DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter); - return SERIALIZE(cells_); + DO_SERIALIZE(SER_COUNTINGBLOOMFILTER, BloomFilter); + return cells_->Serialize(info); } bool CountingBloomFilter::DoUnserialize(UnserialInfo* info) { DO_UNSERIALIZE(BloomFilter); cells_ = CounterVector::Unserialize(info); - return cells_ == NULL; + return cells_ != NULL; } void CountingBloomFilter::AddImpl(const HashPolicy::HashVector& h) diff --git a/src/BloomFilter.h b/src/BloomFilter.h index c0101cadf8..4a83ba904b 100644 --- a/src/BloomFilter.h +++ b/src/BloomFilter.h @@ -73,7 +73,7 @@ protected: private: BitVector* bits_; - unsigned width_; + size_t width_; }; /** diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index 38ea93d000..76936dfb78 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -574,7 +574,7 @@ size_t BloomFilterVal::Count(const Val* val) const BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* first, const BloomFilterVal* second) -{ + { assert(! "not yet implemented"); return NULL; } @@ -594,14 +594,15 @@ IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL); bool BloomFilterVal::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal); - if ( ! SERIALIZE(type_) ) + if ( ! type_->Serialize(info) ) return false; - return SERIALIZE(bloom_filter_); + return bloom_filter_->Serialize(info); } bool BloomFilterVal::DoUnserialize(UnserialInfo* info) { DO_UNSERIALIZE(OpaqueVal); + type_ = BroType::Unserialize(info); if ( ! type_ ) return false; @@ -609,6 +610,7 @@ bool BloomFilterVal::DoUnserialize(UnserialInfo* info) tl->Append(type_); hash_ = new CompositeHash(tl); Unref(tl); + bloom_filter_ = BloomFilter::Unserialize(info); - return bloom_filter_ == NULL; + return bloom_filter_ != NULL; } diff --git a/src/SerialTypes.h b/src/SerialTypes.h index 859145f19f..9e4aef5b3b 100644 --- a/src/SerialTypes.h +++ b/src/SerialTypes.h @@ -50,10 +50,10 @@ SERIAL_IS_BO(CASE, 0x1200) SERIAL_IS(LOCATION, 0x1300) SERIAL_IS(RE_MATCHER, 0x1400) SERIAL_IS(BITVECTOR, 0x1500) -SERIAL_IS(COUNTERVECTOR, 0xa000) -SERIAL_IS(BLOOMFILTER, 0xa100) -SERIAL_IS(BASICBLOOMFILTER, 0xa200) -SERIAL_IS(COUNTINGBLOOMFILTER, 0xa300) +SERIAL_IS(COUNTERVECTOR, 0x1600) +SERIAL_IS(BLOOMFILTER, 0x1700) +SERIAL_IS(BASICBLOOMFILTER, 0x1800) +SERIAL_IS(COUNTINGBLOOMFILTER, 0x1900) // These are the externally visible types. const SerialType SER_NONE = 0; From 14a701a237dfdd745a842a11f363b93d01926505 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Mon, 10 Jun 2013 22:24:23 -0700 Subject: [PATCH 028/118] Implement value merging. The actual BloomFilter merging still lacks, this is just the first step in the right direction from the user interface side. --- src/BloomFilter.cc | 27 ++++++++++++++++++++------- src/BloomFilter.h | 18 ++++++------------ src/OpaqueVal.cc | 17 ++++++++++++++--- src/OpaqueVal.h | 17 ++++++++++++++--- 4 files changed, 54 insertions(+), 25 deletions(-) diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc index 1d73734236..e55db71e46 100644 --- a/src/BloomFilter.cc +++ b/src/BloomFilter.cc @@ -124,9 +124,7 @@ BloomFilter* BloomFilter::Unserialize(UnserialInfo* info) bool BloomFilter::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_BLOOMFILTER, SerialObj); - if ( ! SERIALIZE(static_cast(hash_->K())) ) - return false; - return SERIALIZE(static_cast(elements_)); + return SERIALIZE(static_cast(hash_->K())); } bool BloomFilter::DoUnserialize(UnserialInfo* info) @@ -136,10 +134,6 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info) if ( ! UNSERIALIZE(&k) ) return false; hash_ = new hash_policy(static_cast(k)); - uint64 elements; - if ( ! UNSERIALIZE(&elements) ) - return false; - elements_ = static_cast(elements); return true; } @@ -155,6 +149,17 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity) return std::ceil(frac * std::log(2)); } +BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x, + const BasicBloomFilter* y) + { + BasicBloomFilter* result = new BasicBloomFilter(); + result->bits_ = new BitVector(*x->bits_ | *y->bits_); + // TODO: implement the hasher pool and make sure the new result gets the same + // number of (equal) hash functions. + //assert(x->hash_ == y->hash_); + return result; + } + BasicBloomFilter::BasicBloomFilter() : bits_(NULL) { @@ -201,6 +206,14 @@ size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const return 1; } + +CountingBloomFilter* CountingBloomFilter::Merge(const CountingBloomFilter* x, + const CountingBloomFilter* y) +{ + assert(! "not yet implemented"); + return NULL; +} + CountingBloomFilter::CountingBloomFilter() : cells_(NULL) { diff --git a/src/BloomFilter.h b/src/BloomFilter.h index 4a83ba904b..3b5d9efa71 100644 --- a/src/BloomFilter.h +++ b/src/BloomFilter.h @@ -155,7 +155,6 @@ public: template void Add(const T& x) { - ++elements_; AddImpl(hash_->Hash(&x, sizeof(x))); } @@ -172,16 +171,6 @@ public: return CountImpl(hash_->Hash(&x, sizeof(x))); } - /** - * Retrieves the number of elements added to the Bloom filter. - * - * @return The number of elements in this Bloom filter. - */ - size_t Size() const - { - return elements_; - } - bool Serialize(SerialInfo* info) const; static BloomFilter* Unserialize(UnserialInfo* info); @@ -196,7 +185,6 @@ protected: private: HashPolicy* hash_; - size_t elements_; }; /** @@ -230,6 +218,9 @@ public: */ static size_t K(size_t cells, size_t capacity); + static BasicBloomFilter* Merge(const BasicBloomFilter* x, + const BasicBloomFilter* y); + /** * Constructs a basic Bloom filter with a given false-positive rate and * capacity. @@ -258,6 +249,9 @@ private: */ class CountingBloomFilter : public BloomFilter { public: + static CountingBloomFilter* Merge(const CountingBloomFilter* x, + const CountingBloomFilter* y); + CountingBloomFilter(double fp, size_t capacity, size_t width); CountingBloomFilter(size_t cells, size_t capacity, size_t width); diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index 76936dfb78..9dd5c7f980 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -572,10 +572,21 @@ size_t BloomFilterVal::Count(const Val* val) const return bloom_filter_->Count(key->Hash()); } -BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* first, - const BloomFilterVal* second) +BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x, + const BloomFilterVal* y) { - assert(! "not yet implemented"); + if ( x->Type() != y->Type() ) + { + reporter->InternalError("cannot merge Bloom filters with different types"); + return NULL; + } + + BloomFilterVal* result; + if ( (result = DoMerge(x, y)) ) + return result; + else if ( (result = DoMerge(x, y)) ) + return result; + return NULL; } diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h index e97a530f3a..4b45cad519 100644 --- a/src/OpaqueVal.h +++ b/src/OpaqueVal.h @@ -113,10 +113,10 @@ class BloomFilterVal : public OpaqueVal { BloomFilterVal(const BloomFilterVal&); BloomFilterVal& operator=(const BloomFilterVal&); public: - static BloomFilterVal* Merge(const BloomFilterVal* first, - const BloomFilterVal* second); + static BloomFilterVal* Merge(const BloomFilterVal* x, + const BloomFilterVal* y); - BloomFilterVal(BloomFilter* bf); + explicit BloomFilterVal(BloomFilter* bf); ~BloomFilterVal(); bool Typify(BroType* type); @@ -133,6 +133,17 @@ protected: DECLARE_SERIAL(BloomFilterVal); private: + template + static BloomFilterVal* DoMerge(const BloomFilterVal* x, + const BloomFilterVal* y) + { + const T* a = dynamic_cast(x->bloom_filter_); + const T* b = dynamic_cast(y->bloom_filter_); + if ( a && b ) + return new BloomFilterVal(T::Merge(a, b)); + return NULL; + } + BroType* type_; CompositeHash* hash_; BloomFilter* bloom_filter_; From 1f90b539a8574eeadd4b20ae9f379b0fe08999be Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Thu, 13 Jun 2013 23:06:01 -0700 Subject: [PATCH 029/118] Make H3 class adhere to Bro coding style. --- src/H3.h | 89 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 44 insertions(+), 45 deletions(-) diff --git a/src/H3.h b/src/H3.h index 72d81d519f..50afda5688 100644 --- a/src/H3.h +++ b/src/H3.h @@ -65,53 +65,52 @@ template class H3 { T byte_lookup[N][H3_BYTE_RANGE]; public: - H3(); - T operator()(const void* data, size_t size, size_t offset = 0) const - { - const unsigned char *p = static_cast(data); - T result = 0; + H3() + { + T bit_lookup[N * CHAR_BIT]; - // loop optmized with Duff's Device - register unsigned n = (size + 7) / 8; - switch (size % 8) { - case 0: do { result ^= byte_lookup[offset++][*p++]; - case 7: result ^= byte_lookup[offset++][*p++]; - case 6: result ^= byte_lookup[offset++][*p++]; - case 5: result ^= byte_lookup[offset++][*p++]; - case 4: result ^= byte_lookup[offset++][*p++]; - case 3: result ^= byte_lookup[offset++][*p++]; - case 2: result ^= byte_lookup[offset++][*p++]; - case 1: result ^= byte_lookup[offset++][*p++]; - } while (--n > 0); - } + for ( size_t bit = 0; bit < N * CHAR_BIT; bit++ ) + { + bit_lookup[bit] = 0; + for ( size_t i = 0; i < sizeof(T)/2; i++ ) + // assume random() returns at least 16 random bits + bit_lookup[bit] = (bit_lookup[bit] << 16) | (bro_random() & 0xFFFF); + } - return result; - } + for ( size_t byte = 0; byte < N; byte++ ) + { + for ( unsigned val = 0; val < H3_BYTE_RANGE; val++ ) + { + byte_lookup[byte][val] = 0; + for ( size_t bit = 0; bit < CHAR_BIT; bit++ ) + // Does this mean byte_lookup[*][0] == 0? -RP + if (val & (1 << bit)) + byte_lookup[byte][val] ^= bit_lookup[byte*CHAR_BIT+bit]; + } + } + } + + T operator()(const void* data, size_t size, size_t offset = 0) const + { + const unsigned char *p = static_cast(data); + T result = 0; + + // loop optmized with Duff's Device + register unsigned n = (size + 7) / 8; + switch (size % 8) { + case 0: do { result ^= byte_lookup[offset++][*p++]; + case 7: result ^= byte_lookup[offset++][*p++]; + case 6: result ^= byte_lookup[offset++][*p++]; + case 5: result ^= byte_lookup[offset++][*p++]; + case 4: result ^= byte_lookup[offset++][*p++]; + case 3: result ^= byte_lookup[offset++][*p++]; + case 2: result ^= byte_lookup[offset++][*p++]; + case 1: result ^= byte_lookup[offset++][*p++]; + } while (--n > 0); + } + + return result; + } }; -template -H3::H3() -{ - T bit_lookup[N * CHAR_BIT]; - - for (size_t bit = 0; bit < N * CHAR_BIT; bit++) { - bit_lookup[bit] = 0; - for (size_t i = 0; i < sizeof(T)/2; i++) { - // assume random() returns at least 16 random bits - bit_lookup[bit] = (bit_lookup[bit] << 16) | (bro_random() & 0xFFFF); - } - } - - for (size_t byte = 0; byte < N; byte++) { - for (unsigned val = 0; val < H3_BYTE_RANGE; val++) { - byte_lookup[byte][val] = 0; - for (size_t bit = 0; bit < CHAR_BIT; bit++) { - // Does this mean byte_lookup[*][0] == 0? -RP - if (val & (1 << bit)) - byte_lookup[byte][val] ^= bit_lookup[byte*CHAR_BIT+bit]; - } - } - } -} - #endif //H3_H From 529d12037672d34fd4d1ba5f0d291fd6214f41d4 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Thu, 13 Jun 2013 23:07:31 -0700 Subject: [PATCH 030/118] Make H3 seed configurable. --- src/H3.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/H3.h b/src/H3.h index 50afda5688..11b0cd79a5 100644 --- a/src/H3.h +++ b/src/H3.h @@ -65,7 +65,7 @@ template class H3 { T byte_lookup[N][H3_BYTE_RANGE]; public: - H3() + H3(T seed = bro_random()) { T bit_lookup[N * CHAR_BIT]; @@ -74,7 +74,7 @@ public: bit_lookup[bit] = 0; for ( size_t i = 0; i < sizeof(T)/2; i++ ) // assume random() returns at least 16 random bits - bit_lookup[bit] = (bit_lookup[bit] << 16) | (bro_random() & 0xFFFF); + bit_lookup[bit] = (bit_lookup[bit] << 16) | (seed & 0xFFFF); } for ( size_t byte = 0; byte < N; byte++ ) From a6d7b7856e87c3a15ba7009ccfb7d6550d1dcfcc Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Thu, 13 Jun 2013 23:12:00 -0700 Subject: [PATCH 031/118] Update H3 documentation (and minor style nits.) --- src/H3.h | 60 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/src/H3.h b/src/H3.h index 11b0cd79a5..2eda14d276 100644 --- a/src/H3.h +++ b/src/H3.h @@ -49,9 +49,9 @@ // hash a substring of the data. Hashes of substrings can be bitwise-XOR'ed // together to get the same result as hashing the full string. // Any number of hash functions can be created by creating new instances of H3, -// with the same or different template parameters. The hash function is -// randomly generated using bro_random(); you must call init_random_seed() -// before the H3 constructor if you wish to seed it. +// with the same or different template parameters. The hash function +// constructor takes a seed as argument which defaults to a call to +// bro_random(). #ifndef H3_H @@ -62,34 +62,34 @@ // The number of values representable by a byte. #define H3_BYTE_RANGE (UCHAR_MAX+1) -template class H3 { - T byte_lookup[N][H3_BYTE_RANGE]; +template +class H3 { public: - H3(T seed = bro_random()) + H3(T seed = bro_random()) + { + T bit_lookup[N * CHAR_BIT]; + + for ( size_t bit = 0; bit < N * CHAR_BIT; bit++ ) { - T bit_lookup[N * CHAR_BIT]; - - for ( size_t bit = 0; bit < N * CHAR_BIT; bit++ ) - { - bit_lookup[bit] = 0; - for ( size_t i = 0; i < sizeof(T)/2; i++ ) - // assume random() returns at least 16 random bits - bit_lookup[bit] = (bit_lookup[bit] << 16) | (seed & 0xFFFF); - } - - for ( size_t byte = 0; byte < N; byte++ ) - { - for ( unsigned val = 0; val < H3_BYTE_RANGE; val++ ) - { - byte_lookup[byte][val] = 0; - for ( size_t bit = 0; bit < CHAR_BIT; bit++ ) - // Does this mean byte_lookup[*][0] == 0? -RP - if (val & (1 << bit)) - byte_lookup[byte][val] ^= bit_lookup[byte*CHAR_BIT+bit]; - } - } + bit_lookup[bit] = 0; + for ( size_t i = 0; i < sizeof(T)/2; i++ ) + // assume random() returns at least 16 random bits + bit_lookup[bit] = (bit_lookup[bit] << 16) | (seed & 0xFFFF); } + for ( size_t byte = 0; byte < N; byte++ ) + { + for ( unsigned val = 0; val < H3_BYTE_RANGE; val++ ) + { + byte_lookup[byte][val] = 0; + for ( size_t bit = 0; bit < CHAR_BIT; bit++ ) + // Does this mean byte_lookup[*][0] == 0? -RP + if (val & (1 << bit)) + byte_lookup[byte][val] ^= bit_lookup[byte*CHAR_BIT+bit]; + } + } + } + T operator()(const void* data, size_t size, size_t offset = 0) const { const unsigned char *p = static_cast(data); @@ -97,7 +97,7 @@ public: // loop optmized with Duff's Device register unsigned n = (size + 7) / 8; - switch (size % 8) { + switch ( size % 8 ) { case 0: do { result ^= byte_lookup[offset++][*p++]; case 7: result ^= byte_lookup[offset++][*p++]; case 6: result ^= byte_lookup[offset++][*p++]; @@ -106,11 +106,13 @@ public: case 3: result ^= byte_lookup[offset++][*p++]; case 2: result ^= byte_lookup[offset++][*p++]; case 1: result ^= byte_lookup[offset++][*p++]; - } while (--n > 0); + } while ( --n > 0 ); } return result; } +private: + T byte_lookup[N][H3_BYTE_RANGE]; }; #endif //H3_H From d2d8aff81456413597b09b71557b0caabdb7af3d Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Fri, 14 Jun 2013 09:22:48 -0700 Subject: [PATCH 032/118] Add utility function to access first random seed. --- src/util.cc | 13 +++++++++++++ src/util.h | 5 +++++ 2 files changed, 18 insertions(+) diff --git a/src/util.cc b/src/util.cc index de9bd5b679..721ee10a7e 100644 --- a/src/util.cc +++ b/src/util.cc @@ -716,6 +716,8 @@ static bool write_random_seeds(const char* write_file, uint32 seed, static bool bro_rand_determistic = false; static unsigned int bro_rand_state = 0; +static bool first_seed_saved = false; +static unsigned int first_seed = 0; static void bro_srandom(unsigned int seed, bool deterministic) { @@ -800,6 +802,12 @@ void init_random_seed(uint32 seed, const char* read_file, const char* write_file bro_srandom(seed, seeds_done); + if ( ! first_seed_saved ) + { + first_seed = seed; + first_seed_saved = true; + } + if ( ! hmac_key_set ) { MD5((const u_char*) buf, sizeof(buf), shared_hmac_md5_key); @@ -811,6 +819,11 @@ void init_random_seed(uint32 seed, const char* read_file, const char* write_file write_file); } +unsigned int initial_seed() + { + return first_seed; +} + bool have_random_seed() { return bro_rand_determistic; diff --git a/src/util.h b/src/util.h index 49bcbf318b..c3eebb04e3 100644 --- a/src/util.h +++ b/src/util.h @@ -165,6 +165,11 @@ extern void hmac_md5(size_t size, const unsigned char* bytes, extern void init_random_seed(uint32 seed, const char* load_file, const char* write_file); +// Retrieves the initial seed computed after the very first call to +// init_random_seed(). Repeated calls to init_random_seed() will not affect the +// return value of this function. +unsigned int initial_seed(); + // Returns true if the user explicitly set a seed via init_random_seed(); extern bool have_random_seed(); From 1576239f67ef2641135f95bdd331f3c1a54ee5ad Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Fri, 14 Jun 2013 10:19:39 -0700 Subject: [PATCH 033/118] Support seeding for hashers. --- src/BloomFilter.cc | 11 +++++++++++ src/BloomFilter.h | 4 +++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc index e55db71e46..eff7eee733 100644 --- a/src/BloomFilter.cc +++ b/src/BloomFilter.cc @@ -74,6 +74,17 @@ bool CounterVector::DoUnserialize(UnserialInfo* info) } +HashPolicy::Hasher::Hasher(size_t seed) + : h3_(seed) +{ +} + +HashPolicy::HashType +HashPolicy::Hasher::operator()(const void* x, size_t n) const + { + return h3_(x, n); + } + HashPolicy::HashVector DefaultHashing::Hash(const void* x, size_t n) const { HashVector h(K(), 0); diff --git a/src/BloomFilter.h b/src/BloomFilter.h index 3b5d9efa71..65133621f9 100644 --- a/src/BloomFilter.h +++ b/src/BloomFilter.h @@ -96,7 +96,9 @@ protected: */ class Hasher { public: - HashType operator()(const void* x, size_t n) const { return h3_(x, n); } + Hasher(size_t seed); + + HashType operator()(const void* x, size_t n) const; private: // FIXME: The hardcoded value of 36 comes from UHASH_KEY_SIZE defined in // Hash.h. I do not know how this value impacts the hash function behavior From 79a6a26f9f70a937551a94a5dc83b2c5dafe1414 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Fri, 14 Jun 2013 10:20:33 -0700 Subject: [PATCH 034/118] H3 does not check for zero length input. --- src/BloomFilter.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc index eff7eee733..6a44defc6d 100644 --- a/src/BloomFilter.cc +++ b/src/BloomFilter.cc @@ -82,7 +82,7 @@ HashPolicy::Hasher::Hasher(size_t seed) HashPolicy::HashType HashPolicy::Hasher::operator()(const void* x, size_t n) const { - return h3_(x, n); + return n == 0 ? 0 : h3_(x, n); } HashPolicy::HashVector DefaultHashing::Hash(const void* x, size_t n) const From 9f740642891664ee8f482285523969793d0063d0 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Mon, 17 Jun 2013 14:02:14 -0700 Subject: [PATCH 035/118] Expose Bro's linear congruence PRNG as utility function. It was previously not possible to crank the wheel on the PRNG in a deterministic way without affecting the globally unique seed. The new extra utility function bro_prng takes a state in the form of a long int and returns the new PRNG state, now allowing arbitrary code parts to use the random number functionality. This commit also fixes a problem in the H3 constructor, which requires use of multiple seeds. The single seed passed in now serves as seed to crank out as many value needed using bro_prng. --- src/H3.h | 1 + src/util.cc | 29 ++++++++++++++++++----------- src/util.h | 7 +++++-- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/src/H3.h b/src/H3.h index 2eda14d276..e2dc865147 100644 --- a/src/H3.h +++ b/src/H3.h @@ -72,6 +72,7 @@ public: for ( size_t bit = 0; bit < N * CHAR_BIT; bit++ ) { bit_lookup[bit] = 0; + seed = bro_prng(seed); for ( size_t i = 0; i < sizeof(T)/2; i++ ) // assume random() returns at least 16 random bits bit_lookup[bit] = (bit_lookup[bit] << 16) | (seed & 0xFFFF); diff --git a/src/util.cc b/src/util.cc index 721ee10a7e..cdd257d94f 100644 --- a/src/util.cc +++ b/src/util.cc @@ -829,22 +829,29 @@ bool have_random_seed() return bro_rand_determistic; } +long int bro_prng(long int state) + { + // Use our own simple linear congruence PRNG to make sure we are + // predictable across platforms. + static const long int m = 2147483647; + static const long int a = 16807; + const long int q = m / a; + const long int r = m % a; + + state = a * ( state % q ) - r * ( state / q ); + + if ( state <= 0 ) + state += m; + + return state; + } + long int bro_random() { if ( ! bro_rand_determistic ) return random(); // Use system PRNG. - // Use our own simple linear congruence PRNG to make sure we are - // predictable across platforms. - const long int m = 2147483647; - const long int a = 16807; - const long int q = m / a; - const long int r = m % a; - - bro_rand_state = a * ( bro_rand_state % q ) - r * ( bro_rand_state / q ); - - if ( bro_rand_state <= 0 ) - bro_rand_state += m; + bro_rand_state = bro_prng(bro_rand_state); return bro_rand_state; } diff --git a/src/util.h b/src/util.h index c3eebb04e3..0af401c668 100644 --- a/src/util.h +++ b/src/util.h @@ -173,9 +173,12 @@ unsigned int initial_seed(); // Returns true if the user explicitly set a seed via init_random_seed(); extern bool have_random_seed(); +// A simple linear congruence PRNG. It takes its state as argument and returns +// a new random value, which can serve as state for subsequent calls. +long int bro_prng(long int state); + // Replacement for the system random(), to which is normally falls back -// except when a seed has been given. In that case, we use our own -// predictable PRNG. +// except when a seed has been given. In that case, the function bro_prng. long int bro_random(); // Calls the system srandom() function with the given seed if not running From 532fbfb4d27ac9ee733dbcfebccbc91e652d4eb0 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Mon, 17 Jun 2013 16:06:02 -0700 Subject: [PATCH 036/118] Factor implementation and change interface. When constructing a Bloom filter, one now has to pass a HashPolicy instance to it. This separates more clearly the concerns of hashing and Bloom filter management. This commit also changes the interface to initialize Bloom filters: there exist now two initialization functions, one for each type: (1) bloomfilter_basic_init(fp: double, capacity: count, name: string &default=""): opaque of bloomfilter (2) bloomfilter_counting_init(k: count, cells: count, max: count, name: string &default=""): opaque of bloomfilter The BiFs for adding elements and performing lookups remain the same. This essentially gives us "BiF polymorphism" at script land, where the initialization BiF constructs the most derived type while subsequent BiFs adhere to the same interface. The reason why we split up the constructor in this case is that we have not yet derived the math that computes the optimal number of hash functions for counting Bloom filters---users have to explicitly parameterize them for now. --- src/BloomFilter.cc | 159 +++++--------------------- src/BloomFilter.h | 172 ++++------------------------- src/CMakeLists.txt | 2 + src/CounterVector.cc | 75 +++++++++++++ src/CounterVector.h | 78 +++++++++++++ src/HashPolicy.cc | 72 ++++++++++++ src/HashPolicy.h | 90 +++++++++++++++ src/OpaqueVal.cc | 1 + src/bro.bif | 57 ++++++---- testing/btest/bifs/bloomfilter.bro | 20 ++-- testing/btest/istate/opaque.bro | 2 +- 11 files changed, 409 insertions(+), 319 deletions(-) create mode 100644 src/CounterVector.cc create mode 100644 src/CounterVector.h create mode 100644 src/HashPolicy.cc create mode 100644 src/HashPolicy.h diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc index 6a44defc6d..0be64c18de 100644 --- a/src/BloomFilter.cc +++ b/src/BloomFilter.cc @@ -1,117 +1,16 @@ #include "BloomFilter.h" #include +#include "CounterVector.h" #include "Serializer.h" -CounterVector::CounterVector(size_t width, size_t cells) - : bits_(new BitVector(width * cells)), width_(width) - { - } - -CounterVector::~CounterVector() - { - delete bits_; - } - -bool CounterVector::Increment(size_type cell, count_type value) - { - // TODO - assert(! "not yet implemented"); - return false; - } - -bool CounterVector::Decrement(size_type cell, count_type value) - { - // TODO - assert(! "not yet implemented"); - return false; - } - -CounterVector::count_type CounterVector::Count(size_type cell) const - { - // TODO - assert(! "not yet implemented"); - return 0; - } - -CounterVector::size_type CounterVector::Size() const - { - return bits_->Blocks() / width_; - } - -bool CounterVector::Serialize(SerialInfo* info) const - { - return SerialObj::Serialize(info); - } - -CounterVector* CounterVector::Unserialize(UnserialInfo* info) - { - return reinterpret_cast( - SerialObj::Unserialize(info, SER_COUNTERVECTOR)); - } - -IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR) - -bool CounterVector::DoSerialize(SerialInfo* info) const - { - DO_SERIALIZE(SER_COUNTERVECTOR, SerialObj); - if ( ! bits_->Serialize(info) ) - return false; - return SERIALIZE(static_cast(width_)); - } - -bool CounterVector::DoUnserialize(UnserialInfo* info) - { - DO_UNSERIALIZE(SerialObj); - bits_ = BitVector::Unserialize(info); - if ( ! bits_ ) - return false; - uint64 width; - if ( ! UNSERIALIZE(&width) ) - return false; - width_ = static_cast(width); - return true; - } - - -HashPolicy::Hasher::Hasher(size_t seed) - : h3_(seed) -{ -} - -HashPolicy::HashType -HashPolicy::Hasher::operator()(const void* x, size_t n) const - { - return n == 0 ? 0 : h3_(x, n); - } - -HashPolicy::HashVector DefaultHashing::Hash(const void* x, size_t n) const - { - HashVector h(K(), 0); - for ( size_t i = 0; i < h.size(); ++i ) - h[i] = hashers_[i](x, n); - return h; - } - - -HashPolicy::HashVector DoubleHashing::Hash(const void* x, size_t n) const - { - HashType h1 = hasher1_(x, n); - HashType h2 = hasher2_(x, n); - HashVector h(K(), 0); - for ( size_t i = 0; i < h.size(); ++i ) - h[i] = h1 + i * h2; - return h; - } - - BloomFilter::BloomFilter() : hash_(NULL) { } -BloomFilter::BloomFilter(size_t k) - : hash_(new hash_policy(k)) +BloomFilter::BloomFilter(const HashPolicy* hash_policy) + : hash_(hash_policy) { } @@ -135,7 +34,11 @@ BloomFilter* BloomFilter::Unserialize(UnserialInfo* info) bool BloomFilter::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_BLOOMFILTER, SerialObj); - return SERIALIZE(static_cast(hash_->K())); + // FIXME: Since we have a fixed hashing policy, we just serialize the + // information needed to reconstruct it. + if ( ! SERIALIZE(static_cast(hash_->K())) ) + return false; + return SERIALIZE_STR(hash_->Name().c_str(), hash_->Name().size()); } bool BloomFilter::DoUnserialize(UnserialInfo* info) @@ -144,10 +47,15 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info) uint16 k; if ( ! UNSERIALIZE(&k) ) return false; - hash_ = new hash_policy(static_cast(k)); + const char* name; + if ( ! UNSERIALIZE_STR(&name, 0) ) + return false; + // FIXME: for now Bloom filters always use double hashing. + hash_ = new DefaultHashing(k, name); return true; } + size_t BasicBloomFilter::M(double fp, size_t capacity) { double ln2 = std::log(2); @@ -163,11 +71,9 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity) BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x, const BasicBloomFilter* y) { + // TODO: Ensure that x and y use the same HashPolicy before proceeding. BasicBloomFilter* result = new BasicBloomFilter(); result->bits_ = new BitVector(*x->bits_ | *y->bits_); - // TODO: implement the hasher pool and make sure the new result gets the same - // number of (equal) hash functions. - //assert(x->hash_ == y->hash_); return result; } @@ -176,16 +82,10 @@ BasicBloomFilter::BasicBloomFilter() { } -BasicBloomFilter::BasicBloomFilter(double fp, size_t capacity) - : BloomFilter(K(M(fp, capacity), capacity)) +BasicBloomFilter::BasicBloomFilter(const HashPolicy* hash_policy, size_t cells) + : BloomFilter(hash_policy), + bits_(new BitVector(cells)) { - bits_ = new BitVector(M(fp, capacity)); - } - -BasicBloomFilter::BasicBloomFilter(size_t cells, size_t capacity) - : BloomFilter(K(cells, capacity)) - { - bits_ = new BitVector(cells); } IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER) @@ -203,13 +103,13 @@ bool BasicBloomFilter::DoUnserialize(UnserialInfo* info) return bits_ != NULL; } -void BasicBloomFilter::AddImpl(const HashPolicy::HashVector& h) +void BasicBloomFilter::AddImpl(const HashPolicy::hash_vector& h) { for ( size_t i = 0; i < h.size(); ++i ) bits_->Set(h[i] % bits_->Size()); } -size_t BasicBloomFilter::CountImpl(const HashPolicy::HashVector& h) const +size_t BasicBloomFilter::CountImpl(const HashPolicy::hash_vector& h) const { for ( size_t i = 0; i < h.size(); ++i ) if ( ! (*bits_)[h[i] % bits_->Size()] ) @@ -230,17 +130,9 @@ CountingBloomFilter::CountingBloomFilter() { } -CountingBloomFilter::CountingBloomFilter(double fp, size_t capacity, - size_t width) - : BloomFilter(BasicBloomFilter::K(BasicBloomFilter::M(fp, capacity), - capacity)) - { - cells_ = new CounterVector(width, BasicBloomFilter::M(fp, capacity)); - } - -CountingBloomFilter::CountingBloomFilter(size_t cells, size_t capacity, - size_t width) - : BloomFilter(BasicBloomFilter::K(cells, capacity)) +CountingBloomFilter::CountingBloomFilter(const HashPolicy* hash_policy, + size_t cells, size_t width) + : BloomFilter(hash_policy) { cells_ = new CounterVector(width, cells); } @@ -261,18 +153,19 @@ bool CountingBloomFilter::DoUnserialize(UnserialInfo* info) return cells_ != NULL; } -void CountingBloomFilter::AddImpl(const HashPolicy::HashVector& h) +void CountingBloomFilter::AddImpl(const HashPolicy::hash_vector& h) { for ( size_t i = 0; i < h.size(); ++i ) cells_->Increment(h[i] % cells_->Size(), 1); } -size_t CountingBloomFilter::CountImpl(const HashPolicy::HashVector& h) const +size_t CountingBloomFilter::CountImpl(const HashPolicy::hash_vector& h) const { CounterVector::size_type min = std::numeric_limits::max(); for ( size_t i = 0; i < h.size(); ++i ) { + // TODO: Use partitioning. CounterVector::size_type cnt = cells_->Count(h[i] % cells_->Size()); if ( cnt < min ) min = cnt; diff --git a/src/BloomFilter.h b/src/BloomFilter.h index 65133621f9..189f4920b7 100644 --- a/src/BloomFilter.h +++ b/src/BloomFilter.h @@ -3,141 +3,9 @@ #include #include "BitVector.h" -#include "Hash.h" -#include "H3.h" +#include "HashPolicy.h" -/** - * A vector of counters, each of which have a fixed number of bits. - */ -class CounterVector : public SerialObj { -public: - typedef size_t size_type; - typedef uint64 count_type; - - /** - * Constructs a counter vector having cells of a given width. - * - * @param width The number of bits that each cell occupies. - * - * @param cells The number of cells in the bitvector. - */ - CounterVector(size_t width, size_t cells = 1024); - - ~CounterVector(); - - /** - * Increments a given cell. - * - * @param cell The cell to increment. - * - * @param value The value to add to the current counter in *cell*. - * - * @return `true` if adding *value* to the counter in *cell* succeeded. - */ - bool Increment(size_type cell, count_type value); - - /** - * Decrements a given cell. - * - * @param cell The cell to decrement. - * - * @param value The value to subtract from the current counter in *cell*. - * - * @return `true` if subtracting *value* from the counter in *cell* succeeded. - */ - bool Decrement(size_type cell, count_type value); - - /** - * Retrieves the counter of a given cell. - * - * @param cell The cell index to retrieve the count for. - * - * @return The counter associated with *cell*. - */ - count_type Count(size_type cell) const; - - /** - * Retrieves the number of cells in the storage. - * - * @return The number of cells. - */ - size_type Size() const; - - bool Serialize(SerialInfo* info) const; - static CounterVector* Unserialize(UnserialInfo* info); - -protected: - DECLARE_SERIAL(CounterVector); - - CounterVector() { } - -private: - BitVector* bits_; - size_t width_; -}; - -/** - * The abstract base class for hash policies that hash elements *k* times. - * @tparam Codomain An integral type. - */ -class HashPolicy { -public: - typedef hash_t HashType; - typedef std::vector HashVector; - - virtual ~HashPolicy() { } - size_t K() const { return k_; } - virtual HashVector Hash(const void* x, size_t n) const = 0; - -protected: - /** - * A functor that computes a universal hash function. - * @tparam Codomain An integral type. - */ - class Hasher { - public: - Hasher(size_t seed); - - HashType operator()(const void* x, size_t n) const; - private: - // FIXME: The hardcoded value of 36 comes from UHASH_KEY_SIZE defined in - // Hash.h. I do not know how this value impacts the hash function behavior - // so I'll just copy it verbatim. (Matthias) - H3 h3_; - }; - - HashPolicy(size_t k) : k_(k) { } - -private: - const size_t k_; -}; - -/** - * The *default* hashing policy. Performs *k* hash function computations. - */ -class DefaultHashing : public HashPolicy { -public: - DefaultHashing(size_t k) : HashPolicy(k), hashers_(k) { } - - virtual HashVector Hash(const void* x, size_t n) const; - -private: - std::vector hashers_; -}; - -/** - * The *double-hashing* policy. Uses a linear combination of two hash functions. - */ -class DoubleHashing : public HashPolicy { -public: - DoubleHashing(size_t k) : HashPolicy(k) { } - - virtual HashVector Hash(const void* x, size_t n) const; - -private: - Hasher hasher1_; - Hasher hasher2_; -}; +class CounterVector; /** * The abstract base class for Bloom filters. @@ -146,8 +14,6 @@ class BloomFilter : public SerialObj { public: // At this point we won't let the user choose the hash policy, but we might // open up the interface in the future. - typedef DoubleHashing hash_policy; - virtual ~BloomFilter(); /** @@ -180,13 +46,19 @@ protected: DECLARE_ABSTRACT_SERIAL(BloomFilter); BloomFilter(); - BloomFilter(size_t k); - virtual void AddImpl(const HashPolicy::HashVector& hashes) = 0; - virtual size_t CountImpl(const HashPolicy::HashVector& hashes) const = 0; + /** + * Constructs a Bloom filter. + * + * @param hash_policy The hash policy to use for this Bloom filter. + */ + BloomFilter(const HashPolicy* hash_policy); + + virtual void AddImpl(const HashPolicy::hash_vector& hashes) = 0; + virtual size_t CountImpl(const HashPolicy::hash_vector& hashes) const = 0; private: - HashPolicy* hash_; + const HashPolicy* hash_; }; /** @@ -223,24 +95,18 @@ public: static BasicBloomFilter* Merge(const BasicBloomFilter* x, const BasicBloomFilter* y); - /** - * Constructs a basic Bloom filter with a given false-positive rate and - * capacity. - */ - BasicBloomFilter(double fp, size_t capacity); - /** * Constructs a basic Bloom filter with a given number of cells and capacity. */ - BasicBloomFilter(size_t cells, size_t capacity); + BasicBloomFilter(const HashPolicy* hash_policy, size_t cells); protected: DECLARE_SERIAL(BasicBloomFilter); BasicBloomFilter(); - virtual void AddImpl(const HashPolicy::HashVector& h); - virtual size_t CountImpl(const HashPolicy::HashVector& h) const; + virtual void AddImpl(const HashPolicy::hash_vector& h); + virtual size_t CountImpl(const HashPolicy::hash_vector& h) const; private: BitVector* bits_; @@ -254,16 +120,16 @@ public: static CountingBloomFilter* Merge(const CountingBloomFilter* x, const CountingBloomFilter* y); - CountingBloomFilter(double fp, size_t capacity, size_t width); - CountingBloomFilter(size_t cells, size_t capacity, size_t width); + CountingBloomFilter(const HashPolicy* hash_policy, size_t cells, + size_t width); protected: DECLARE_SERIAL(CountingBloomFilter); CountingBloomFilter(); - virtual void AddImpl(const HashPolicy::HashVector& h); - virtual size_t CountImpl(const HashPolicy::HashVector& h) const; + virtual void AddImpl(const HashPolicy::hash_vector& h); + virtual size_t CountImpl(const HashPolicy::hash_vector& h) const; private: CounterVector* cells_; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1537bb04b0..f2c7ce6bad 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -255,6 +255,7 @@ set(bro_SRCS ChunkedIO.cc CompHash.cc Conn.cc + CounterVector.cc DFA.cc DbgBreakpoint.cc DbgHelp.cc @@ -278,6 +279,7 @@ set(bro_SRCS Frame.cc Func.cc Hash.cc + HashPolicy.cc ID.cc IntSet.cc IOSource.cc diff --git a/src/CounterVector.cc b/src/CounterVector.cc new file mode 100644 index 0000000000..8ed4c30427 --- /dev/null +++ b/src/CounterVector.cc @@ -0,0 +1,75 @@ +#include "CounterVector.h" + +#include "BitVector.h" +#include "Serializer.h" + +CounterVector::CounterVector(size_t width, size_t cells) + : bits_(new BitVector(width * cells)), width_(width) + { + } + +CounterVector::~CounterVector() + { + delete bits_; + } + +bool CounterVector::Increment(size_type cell, count_type value) + { + // TODO + assert(! "not yet implemented"); + return false; + } + +bool CounterVector::Decrement(size_type cell, count_type value) + { + // TODO + assert(! "not yet implemented"); + return false; + } + +CounterVector::count_type CounterVector::Count(size_type cell) const + { + // TODO + assert(! "not yet implemented"); + return 0; + } + +CounterVector::size_type CounterVector::Size() const + { + return bits_->Blocks() / width_; + } + +bool CounterVector::Serialize(SerialInfo* info) const + { + return SerialObj::Serialize(info); + } + +CounterVector* CounterVector::Unserialize(UnserialInfo* info) + { + return reinterpret_cast( + SerialObj::Unserialize(info, SER_COUNTERVECTOR)); + } + +IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR) + +bool CounterVector::DoSerialize(SerialInfo* info) const + { + DO_SERIALIZE(SER_COUNTERVECTOR, SerialObj); + if ( ! bits_->Serialize(info) ) + return false; + return SERIALIZE(static_cast(width_)); + } + +bool CounterVector::DoUnserialize(UnserialInfo* info) + { + DO_UNSERIALIZE(SerialObj); + bits_ = BitVector::Unserialize(info); + if ( ! bits_ ) + return false; + uint64 width; + if ( ! UNSERIALIZE(&width) ) + return false; + width_ = static_cast(width); + return true; + } + diff --git a/src/CounterVector.h b/src/CounterVector.h new file mode 100644 index 0000000000..ecc8fe90e0 --- /dev/null +++ b/src/CounterVector.h @@ -0,0 +1,78 @@ +#ifndef CounterVector_h +#define CounterVector_h + +#include "SerialObj.h" + +class BitVector; + +/** + * A vector of counters, each of which have a fixed number of bits. + */ +class CounterVector : public SerialObj { +public: + typedef size_t size_type; + typedef uint64 count_type; + + /** + * Constructs a counter vector having cells of a given width. + * + * @param width The number of bits that each cell occupies. + * + * @param cells The number of cells in the bitvector. + */ + CounterVector(size_t width, size_t cells = 1024); + + ~CounterVector(); + + /** + * Increments a given cell. + * + * @param cell The cell to increment. + * + * @param value The value to add to the current counter in *cell*. + * + * @return `true` if adding *value* to the counter in *cell* succeeded. + */ + bool Increment(size_type cell, count_type value); + + /** + * Decrements a given cell. + * + * @param cell The cell to decrement. + * + * @param value The value to subtract from the current counter in *cell*. + * + * @return `true` if subtracting *value* from the counter in *cell* succeeded. + */ + bool Decrement(size_type cell, count_type value); + + /** + * Retrieves the counter of a given cell. + * + * @param cell The cell index to retrieve the count for. + * + * @return The counter associated with *cell*. + */ + count_type Count(size_type cell) const; + + /** + * Retrieves the number of cells in the storage. + * + * @return The number of cells. + */ + size_type Size() const; + + bool Serialize(SerialInfo* info) const; + static CounterVector* Unserialize(UnserialInfo* info); + +protected: + DECLARE_SERIAL(CounterVector); + + CounterVector() { } + +private: + BitVector* bits_; + size_t width_; +}; + +#endif diff --git a/src/HashPolicy.cc b/src/HashPolicy.cc new file mode 100644 index 0000000000..d6fb4f3da4 --- /dev/null +++ b/src/HashPolicy.cc @@ -0,0 +1,72 @@ +#include "HashPolicy.h" + +#include "digest.h" + +Hasher::Hasher(size_t seed, const std::string& extra) + : h_(compute_seed(seed, extra)) + { + } + +Hasher::hash_type Hasher::operator()(const void* x, size_t n) const + { + return n == 0 ? 0 : h_(x, n); + } + +size_t Hasher::compute_seed(size_t seed, const std::string& extra) + { + u_char digest[SHA256_DIGEST_LENGTH]; + SHA256_CTX ctx; + sha256_init(&ctx); + if ( extra.empty() ) + { + unsigned int first_seed = initial_seed(); + sha256_update(&ctx, &first_seed, sizeof(first_seed)); + } + else + { + sha256_update(&ctx, extra.c_str(), extra.size()); + } + sha256_update(&ctx, &seed, sizeof(seed)); + sha256_final(&ctx, digest); + return *reinterpret_cast(digest); + } + + +HashPolicy::HashPolicy(size_t k, const std::string& name) + : k_(k), name_(name) + { + } + +DefaultHashing::DefaultHashing(size_t k, const std::string& name) + : HashPolicy(k, name) + { + for ( size_t i = 0; i < k; ++i ) + hashers_.push_back(Hasher(i, name)); + } + +HashPolicy::hash_vector DefaultHashing::Hash(const void* x, size_t n) const + { + hash_vector h(K(), 0); + for ( size_t i = 0; i < h.size(); ++i ) + h[i] = hashers_[i](x, n); + return h; + } + +DoubleHashing::DoubleHashing(size_t k, const std::string& name) + : HashPolicy(k, name), + hasher1_(1, name), + hasher2_(2, name) + { + } + +HashPolicy::hash_vector DoubleHashing::Hash(const void* x, size_t n) const + { + hash_type h1 = hasher1_(x, n); + hash_type h2 = hasher2_(x, n); + hash_vector h(K(), 0); + for ( size_t i = 0; i < h.size(); ++i ) + h[i] = h1 + i * h2; + return h; + } + + diff --git a/src/HashPolicy.h b/src/HashPolicy.h new file mode 100644 index 0000000000..4660bc0080 --- /dev/null +++ b/src/HashPolicy.h @@ -0,0 +1,90 @@ +#ifndef HashPolicy_h +#define HashPolicy_h + +#include "Hash.h" +#include "H3.h" + +/** + * A functor that computes a universal hash function. + */ +class Hasher { +public: + typedef hash_t hash_type; + + /** + * Constructs a hasher seeded by a given seed and optionally an extra + * descriptor. + * + * @param seed The seed to use. + * + * @param extra If not `NULL`, the hasher will not mix in the initial seed + * but instead use this NUL-terminated string as additional seed. + */ + Hasher(size_t seed, const std::string& extra = ""); + + /** + * Computes the hash digest of contiguous data. + * + * @param x A pointer to the beginning of the byte sequence to hash. + * + * @param n The length of the sequence pointed to by *x*. + */ + hash_type operator()(const void* x, size_t n) const; + +private: + static size_t compute_seed(size_t seed, const std::string& extra); + + H3 h_; +}; + +/** + * The abstract base class for hash policies that hash elements *k* times. + */ +class HashPolicy { +public: + typedef Hasher::hash_type hash_type; + typedef std::vector hash_vector; + + virtual ~HashPolicy() { } + + virtual hash_vector Hash(const void* x, size_t n) const = 0; + + size_t K() const { return k_; } + const std::string& Name() const { return name_; } + +protected: + HashPolicy(size_t k, const std::string& name); + +private: + const size_t k_; + std::string name_; +}; + +/** + * The default hashing policy. Performs *k* hash function computations. + */ +class DefaultHashing : public HashPolicy { +public: + DefaultHashing(size_t k, const std::string& name); + + virtual hash_vector Hash(const void* x, size_t n) const /* override */; + +private: + std::vector hashers_; +}; + +/** + * The *double-hashing* policy. Uses a linear combination of two hash functions. + */ +class DoubleHashing : public HashPolicy { +public: + DoubleHashing(size_t k, const std::string& name); + + virtual hash_vector Hash(const void* x, size_t n) const; + +private: + Hasher hasher1_; + Hasher hasher2_; +}; + +#endif diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index 9dd5c7f980..8b82916689 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -605,6 +605,7 @@ IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL); bool BloomFilterVal::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal); + assert( type_ ); if ( ! type_->Serialize(info) ) return false; return bloom_filter_->Serialize(info); diff --git a/src/bro.bif b/src/bro.bif index 9b80c90dbf..a89b808888 100644 --- a/src/bro.bif +++ b/src/bro.bif @@ -4986,42 +4986,55 @@ function anonymize_addr%(a: addr, cl: IPAddrAnonymizationClass%): addr #include "BloomFilter.h" %%} -## Initializes a Bloom filter data structure. +## Creates a basic Bloom filter. ## ## fp: The desired false-positive rate. ## ## capacity: the maximum number of elements that guarantees a false-positive ## rate of *fp*. ## -## max: The maximum counter value associated with each each element in the -## Bloom filter. If greater than 1, each element in the set has a counter of -## *w = ceil(log_2(max))* bits. Each bit in the underlying bit vector then -## becomes a cell of size *w* bits. Since the number number of cells is a -## function ## of *fp* and *capacity*, it is important to consider the effects -## on space when tuning this value. +## name: A name that uniquely identifies and seeds the Bloom filter. If empty, +## the initialization will become dependent on the initial seed. ## ## Returns: A Bloom filter handle. -function bloomfilter_init%(fp: double, capacity: count, - max: count &default=1%): opaque of bloomfilter +function bloomfilter_basic_init%(fp: double, capacity: count, + name: string &default=""%): opaque of bloomfilter %{ if ( fp < 0.0 || fp > 1.0 ) { reporter->Error("false-positive rate must take value between 0 and 1"); return NULL; } - BloomFilter* bf; - if ( max == 1 ) - { - bf = new BasicBloomFilter(fp, capacity); - } - else - { - uint16 width = 0; - while ( max >>= 1 ) - ++width; - bf = new CountingBloomFilter(fp, capacity, width); - } - return new BloomFilterVal(bf); + + size_t cells = BasicBloomFilter::M(fp, capacity); + size_t optimal_k = BasicBloomFilter::K(cells, capacity); + const HashPolicy* hp = new DefaultHashing(optimal_k, name->CheckString()); + fprintf(stderr, "constructing Bloom filter with %llu hash fns and %llu cells\n", optimal_k, cells); + return new BloomFilterVal(new BasicBloomFilter(hp, cells)); + %} + +## Creates a counting Bloom filter. +## +## k: The number of hash functions to use. +## +## cells: The number of cells of the underlying counter vector. +## +## max: The maximum counter value associated with each each element described +## by *w = ceil(log_2(max))* bits. Each bit in the underlying counter vector +## becomes a cell of size *w* bits. +## +## name: A name that uniquely identifies and seeds the Bloom filter. If empty, +## the initialization will become dependent on the initial seed. +## +## Returns: A Bloom filter handle. +function bloomfilter_counting_init%(k: count, cells: count, max: count, + name: string &default=""%): opaque of bloomfilter + %{ + const HashPolicy* hp = new DefaultHashing(k, name->CheckString()); + uint16 width = 0; + while ( max >>= 1 ) + ++width; + return new BloomFilterVal(new CountingBloomFilter(hp, cells, width)); %} ## Adds an element to a Bloom filter. diff --git a/testing/btest/bifs/bloomfilter.bro b/testing/btest/bifs/bloomfilter.bro index 769cec1200..3ff6a6668e 100644 --- a/testing/btest/bifs/bloomfilter.bro +++ b/testing/btest/bifs/bloomfilter.bro @@ -4,7 +4,7 @@ event bro_init() { # Basic usage with counts. - local bf_cnt = bloomfilter_init(0.1, 1000); + local bf_cnt = bloomfilter_basic_init(0.1, 1000); bloomfilter_add(bf_cnt, 42); bloomfilter_add(bf_cnt, 84); bloomfilter_add(bf_cnt, 168); @@ -16,23 +16,23 @@ event bro_init() bloomfilter_add(bf_cnt, "foo"); # Type mismatch # Basic usage with strings. - local bf_str = bloomfilter_init(0.9, 10); + local bf_str = bloomfilter_basic_init(0.9, 10); bloomfilter_add(bf_str, "foo"); bloomfilter_add(bf_str, "bar"); print bloomfilter_lookup(bf_str, "foo"); print bloomfilter_lookup(bf_str, "bar"); - print bloomfilter_lookup(bf_str, "baz"); # FP - print bloomfilter_lookup(bf_str, "qux"); # FP + print bloomfilter_lookup(bf_str, "b4z"); # FP + print bloomfilter_lookup(bf_str, "quux"); # FP bloomfilter_add(bf_str, 0.5); # Type mismatch bloomfilter_add(bf_str, 100); # Type mismatch # Edge cases. - local bf_edge0 = bloomfilter_init(0.000000000001, 1); - local bf_edge1 = bloomfilter_init(0.00000001, 100000000); - local bf_edge2 = bloomfilter_init(0.9999999, 1); - local bf_edge3 = bloomfilter_init(0.9999999, 100000000000); + local bf_edge0 = bloomfilter_basic_init(0.000000000001, 1); + local bf_edge1 = bloomfilter_basic_init(0.00000001, 100000000); + local bf_edge2 = bloomfilter_basic_init(0.9999999, 1); + local bf_edge3 = bloomfilter_basic_init(0.9999999, 100000000000); # Invalid parameters. - local bf_bug0 = bloomfilter_init(-0.5, 42); - local bf_bug1 = bloomfilter_init(1.1, 42); + local bf_bug0 = bloomfilter_basic_init(-0.5, 42); + local bf_bug1 = bloomfilter_basic_init(1.1, 42); } diff --git a/testing/btest/istate/opaque.bro b/testing/btest/istate/opaque.bro index ac3b2c0874..b387f9d6bc 100644 --- a/testing/btest/istate/opaque.bro +++ b/testing/btest/istate/opaque.bro @@ -82,7 +82,7 @@ event bro_init() if ( ! entropy_test_add(entropy_handle, "f") ) print out, "entropy_test_add() failed"; - bloomfilter_handle = bloomfilter_init(0.1, 100); + bloomfilter_handle = bloomfilter_basic_init(0.1, 100); for ( e in bloomfilter_elements ) bloomfilter_add(bloomfilter_handle, e); } From 85668e7054dd22bc783a620eaf88b04f2e4bb952 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Mon, 17 Jun 2013 16:16:44 -0700 Subject: [PATCH 037/118] Remove lingering debug code. --- src/bro.bif | 1 - 1 file changed, 1 deletion(-) diff --git a/src/bro.bif b/src/bro.bif index a89b808888..7c81966317 100644 --- a/src/bro.bif +++ b/src/bro.bif @@ -5009,7 +5009,6 @@ function bloomfilter_basic_init%(fp: double, capacity: count, size_t cells = BasicBloomFilter::M(fp, capacity); size_t optimal_k = BasicBloomFilter::K(cells, capacity); const HashPolicy* hp = new DefaultHashing(optimal_k, name->CheckString()); - fprintf(stderr, "constructing Bloom filter with %llu hash fns and %llu cells\n", optimal_k, cells); return new BloomFilterVal(new BasicBloomFilter(hp, cells)); %} From e6e5f4926f5a850c773af05b51d7004fc4899a7c Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Mon, 17 Jun 2013 16:26:35 -0700 Subject: [PATCH 038/118] Create hash policies through factory. --- src/BloomFilter.cc | 5 +---- src/HashPolicy.cc | 5 +++++ src/HashPolicy.h | 7 +++++++ src/bro.bif | 4 ++-- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc index 0be64c18de..59d411d8e2 100644 --- a/src/BloomFilter.cc +++ b/src/BloomFilter.cc @@ -34,8 +34,6 @@ BloomFilter* BloomFilter::Unserialize(UnserialInfo* info) bool BloomFilter::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_BLOOMFILTER, SerialObj); - // FIXME: Since we have a fixed hashing policy, we just serialize the - // information needed to reconstruct it. if ( ! SERIALIZE(static_cast(hash_->K())) ) return false; return SERIALIZE_STR(hash_->Name().c_str(), hash_->Name().size()); @@ -50,8 +48,7 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info) const char* name; if ( ! UNSERIALIZE_STR(&name, 0) ) return false; - // FIXME: for now Bloom filters always use double hashing. - hash_ = new DefaultHashing(k, name); + hash_ = HashPolicy::Create(k, name); return true; } diff --git a/src/HashPolicy.cc b/src/HashPolicy.cc index d6fb4f3da4..7ce754be3c 100644 --- a/src/HashPolicy.cc +++ b/src/HashPolicy.cc @@ -32,6 +32,11 @@ size_t Hasher::compute_seed(size_t seed, const std::string& extra) } +HashPolicy* HashPolicy::Create(size_t k, const std::string& name) + { + return new DefaultHashing(k, name); + } + HashPolicy::HashPolicy(size_t k, const std::string& name) : k_(k), name_(name) { diff --git a/src/HashPolicy.h b/src/HashPolicy.h index 4660bc0080..7bdb968bfe 100644 --- a/src/HashPolicy.h +++ b/src/HashPolicy.h @@ -42,6 +42,13 @@ private: */ class HashPolicy { public: + /** + * Constructs the hashing policy used by the implementation. This factory + * function exists because the HashingPolicy class hierachy is not yet + * serializable. + */ + static HashPolicy* Create(size_t k, const std::string& name); + typedef Hasher::hash_type hash_type; typedef std::vector hash_vector; diff --git a/src/bro.bif b/src/bro.bif index 7c81966317..d0ce066139 100644 --- a/src/bro.bif +++ b/src/bro.bif @@ -5008,7 +5008,7 @@ function bloomfilter_basic_init%(fp: double, capacity: count, size_t cells = BasicBloomFilter::M(fp, capacity); size_t optimal_k = BasicBloomFilter::K(cells, capacity); - const HashPolicy* hp = new DefaultHashing(optimal_k, name->CheckString()); + const HashPolicy* hp = HashPolicy::Create(optimal_k, name->CheckString()); return new BloomFilterVal(new BasicBloomFilter(hp, cells)); %} @@ -5029,7 +5029,7 @@ function bloomfilter_basic_init%(fp: double, capacity: count, function bloomfilter_counting_init%(k: count, cells: count, max: count, name: string &default=""%): opaque of bloomfilter %{ - const HashPolicy* hp = new DefaultHashing(k, name->CheckString()); + const HashPolicy* hp = HashPolicy::Create(k, name->CheckString()); uint16 width = 0; while ( max >>= 1 ) ++width; From 273629de366290f411f381fe5970fc672adf465f Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Tue, 18 Jun 2013 10:23:07 -0700 Subject: [PATCH 039/118] Only serialize Bloom filter type if available. --- src/OpaqueVal.cc | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index 8b82916689..5a673c4a40 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -605,9 +605,13 @@ IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL); bool BloomFilterVal::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal); - assert( type_ ); - if ( ! type_->Serialize(info) ) + + bool is_typed = type_ != NULL; + if ( ! SERIALIZE(is_typed) ) return false; + if ( is_typed && ! type_->Serialize(info) ) + return false; + return bloom_filter_->Serialize(info); } @@ -615,13 +619,16 @@ bool BloomFilterVal::DoUnserialize(UnserialInfo* info) { DO_UNSERIALIZE(OpaqueVal); - type_ = BroType::Unserialize(info); - if ( ! type_ ) + bool is_typed; + if ( ! UNSERIALIZE(&is_typed) ) return false; - TypeList* tl = new TypeList(type_); - tl->Append(type_); - hash_ = new CompositeHash(tl); - Unref(tl); + if ( is_typed ) + { + BroType* type = BroType::Unserialize(info); + if ( ! Typify(type) ) + return false; + Unref(type); + } bloom_filter_ = BloomFilter::Unserialize(info); return bloom_filter_ != NULL; From 5f70452a9ac816346c4e480d8de52b213630b5b7 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Tue, 18 Jun 2013 10:40:00 -0700 Subject: [PATCH 040/118] Small fixes and style tweaks. --- src/BitVector.cc | 2 +- src/BloomFilter.cc | 1 + src/OpaqueVal.h | 4 +--- src/Type.cc | 6 +++--- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/BitVector.cc b/src/BitVector.cc index f029230609..64db32131f 100644 --- a/src/BitVector.cc +++ b/src/BitVector.cc @@ -473,7 +473,7 @@ bool BitVector::DoSerialize(SerialInfo* info) const if ( ! SERIALIZE(static_cast(bits_.size())) ) return false; - for (size_t i = 0; i < bits_.size(); ++i) + for ( size_t i = 0; i < bits_.size(); ++i ) if ( ! SERIALIZE(static_cast(bits_[i])) ) return false; diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc index 59d411d8e2..a7727630f7 100644 --- a/src/BloomFilter.cc +++ b/src/BloomFilter.cc @@ -49,6 +49,7 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info) if ( ! UNSERIALIZE_STR(&name, 0) ) return false; hash_ = HashPolicy::Create(k, name); + delete [] name; return true; } diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h index 4b45cad519..2362fdacfc 100644 --- a/src/OpaqueVal.h +++ b/src/OpaqueVal.h @@ -139,9 +139,7 @@ private: { const T* a = dynamic_cast(x->bloom_filter_); const T* b = dynamic_cast(y->bloom_filter_); - if ( a && b ) - return new BloomFilterVal(T::Merge(a, b)); - return NULL; + return a && b ? new BloomFilterVal(T::Merge(a, b)) : NULL; } BroType* type_; diff --git a/src/Type.cc b/src/Type.cc index 6461bf2560..f19de461cd 100644 --- a/src/Type.cc +++ b/src/Type.cc @@ -1311,19 +1311,19 @@ IMPLEMENT_SERIAL(OpaqueType, SER_OPAQUE_TYPE); bool OpaqueType::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_OPAQUE_TYPE, BroType); - return SERIALIZE(name); + return SERIALIZE_STR(name.c_str(), name.size()); } bool OpaqueType::DoUnserialize(UnserialInfo* info) { DO_UNSERIALIZE(BroType); - char const* n; + const char* n; if ( ! UNSERIALIZE_STR(&n, 0) ) return false; - name = n; delete [] n; + return true; } From fef3180942723b4124007b605da7c1d93f8f8ce3 Mon Sep 17 00:00:00 2001 From: Bernhard Amann Date: Tue, 2 Jul 2013 18:54:46 -0700 Subject: [PATCH 041/118] bump sqlite to 3.7.17. --- src/3rdparty/sqlite3.c | 3176 ++++++++++++++++++++++++++++++++++------ src/3rdparty/sqlite3.h | 109 +- 2 files changed, 2846 insertions(+), 439 deletions(-) diff --git a/src/3rdparty/sqlite3.c b/src/3rdparty/sqlite3.c index ba6a30e132..deef460899 100644 --- a/src/3rdparty/sqlite3.c +++ b/src/3rdparty/sqlite3.c @@ -1,9 +1,6 @@ -# define SQLITE_THREADSAFE 2 -# define SQLITE_DEFAULT_MEMSTATUS 0 - /****************************************************************************** ** This file is an amalgamation of many separate C source files from SQLite -** version 3.7.16.2. By combining all the individual C code files into this +** version 3.7.17. By combining all the individual C code files into this ** single large file, the entire code can be compiled as a single translation ** unit. This allows many compilers to do optimizations that would not be ** possible if the files were compiled separately. Performance improvements @@ -365,11 +362,11 @@ ** We support that for legacy. */ #if !defined(SQLITE_THREADSAFE) -#if defined(THREADSAFE) -# define SQLITE_THREADSAFE THREADSAFE -#else -# define SQLITE_THREADSAFE 1 /* IMP: R-07272-22309 */ -#endif +# if defined(THREADSAFE) +# define SQLITE_THREADSAFE THREADSAFE +# else +# define SQLITE_THREADSAFE 1 /* IMP: R-07272-22309 */ +# endif #endif /* @@ -681,9 +678,9 @@ extern "C" { ** [sqlite3_libversion_number()], [sqlite3_sourceid()], ** [sqlite_version()] and [sqlite_source_id()]. */ -#define SQLITE_VERSION "3.7.16.2" -#define SQLITE_VERSION_NUMBER 3007016 -#define SQLITE_SOURCE_ID "2013-04-12 11:52:43 cbea02d93865ce0e06789db95fd9168ebac970c7" +#define SQLITE_VERSION "3.7.17" +#define SQLITE_VERSION_NUMBER 3007017 +#define SQLITE_SOURCE_ID "2013-05-20 00:56:22 118a3b35693b134d56ebd780123b7fd6f1497668" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -999,6 +996,8 @@ SQLITE_API int sqlite3_exec( #define SQLITE_FORMAT 24 /* Auxiliary database format error */ #define SQLITE_RANGE 25 /* 2nd parameter to sqlite3_bind out of range */ #define SQLITE_NOTADB 26 /* File opened that is not a database file */ +#define SQLITE_NOTICE 27 /* Notifications from sqlite3_log() */ +#define SQLITE_WARNING 28 /* Warnings from sqlite3_log() */ #define SQLITE_ROW 100 /* sqlite3_step() has another row ready */ #define SQLITE_DONE 101 /* sqlite3_step() has finished executing */ /* end-of-error-codes */ @@ -1049,6 +1048,7 @@ SQLITE_API int sqlite3_exec( #define SQLITE_IOERR_SHMMAP (SQLITE_IOERR | (21<<8)) #define SQLITE_IOERR_SEEK (SQLITE_IOERR | (22<<8)) #define SQLITE_IOERR_DELETE_NOENT (SQLITE_IOERR | (23<<8)) +#define SQLITE_IOERR_MMAP (SQLITE_IOERR | (24<<8)) #define SQLITE_LOCKED_SHAREDCACHE (SQLITE_LOCKED | (1<<8)) #define SQLITE_BUSY_RECOVERY (SQLITE_BUSY | (1<<8)) #define SQLITE_CANTOPEN_NOTEMPDIR (SQLITE_CANTOPEN | (1<<8)) @@ -1068,6 +1068,8 @@ SQLITE_API int sqlite3_exec( #define SQLITE_CONSTRAINT_TRIGGER (SQLITE_CONSTRAINT | (7<<8)) #define SQLITE_CONSTRAINT_UNIQUE (SQLITE_CONSTRAINT | (8<<8)) #define SQLITE_CONSTRAINT_VTAB (SQLITE_CONSTRAINT | (9<<8)) +#define SQLITE_NOTICE_RECOVER_WAL (SQLITE_NOTICE | (1<<8)) +#define SQLITE_NOTICE_RECOVER_ROLLBACK (SQLITE_NOTICE | (2<<8)) /* ** CAPI3REF: Flags For File Open Operations @@ -1307,6 +1309,9 @@ struct sqlite3_io_methods { void (*xShmBarrier)(sqlite3_file*); int (*xShmUnmap)(sqlite3_file*, int deleteFlag); /* Methods above are valid for version 2 */ + int (*xFetch)(sqlite3_file*, sqlite3_int64 iOfst, int iAmt, void **pp); + int (*xUnfetch)(sqlite3_file*, sqlite3_int64 iOfst, void *p); + /* Methods above are valid for version 3 */ /* Additional methods may be added in future releases */ }; @@ -1443,7 +1448,8 @@ struct sqlite3_io_methods { ** it is able to override built-in [PRAGMA] statements. ** **
  • [[SQLITE_FCNTL_BUSYHANDLER]] -** ^This file-control may be invoked by SQLite on the database file handle +** ^The [SQLITE_FCNTL_BUSYHANDLER] +** file-control may be invoked by SQLite on the database file handle ** shortly after it is opened in order to provide a custom VFS with access ** to the connections busy-handler callback. The argument is of type (void **) ** - an array of two (void *) values. The first (void *) actually points @@ -1454,13 +1460,24 @@ struct sqlite3_io_methods { ** current operation. ** **
  • [[SQLITE_FCNTL_TEMPFILENAME]] -** ^Application can invoke this file-control to have SQLite generate a +** ^Application can invoke the [SQLITE_FCNTL_TEMPFILENAME] file-control +** to have SQLite generate a ** temporary filename using the same algorithm that is followed to generate ** temporary filenames for TEMP tables and other internal uses. The ** argument should be a char** which will be filled with the filename ** written into memory obtained from [sqlite3_malloc()]. The caller should ** invoke [sqlite3_free()] on the result to avoid a memory leak. ** +**
  • [[SQLITE_FCNTL_MMAP_SIZE]] +** The [SQLITE_FCNTL_MMAP_SIZE] file control is used to query or set the +** maximum number of bytes that will be used for memory-mapped I/O. +** The argument is a pointer to a value of type sqlite3_int64 that +** is an advisory maximum number of bytes in the file to memory map. The +** pointer is overwritten with the old value. The limit is not changed if +** the value originally pointed to is negative, and so the current limit +** can be queried by passing in a pointer to a negative number. This +** file-control is used internally to implement [PRAGMA mmap_size]. +** ** */ #define SQLITE_FCNTL_LOCKSTATE 1 @@ -1479,6 +1496,7 @@ struct sqlite3_io_methods { #define SQLITE_FCNTL_PRAGMA 14 #define SQLITE_FCNTL_BUSYHANDLER 15 #define SQLITE_FCNTL_TEMPFILENAME 16 +#define SQLITE_FCNTL_MMAP_SIZE 18 /* ** CAPI3REF: Mutex Handle @@ -2145,7 +2163,9 @@ struct sqlite3_mem_methods { ** page cache implementation into that object.)^ ** ** [[SQLITE_CONFIG_LOG]]
    SQLITE_CONFIG_LOG
    -**
    ^The SQLITE_CONFIG_LOG option takes two arguments: a pointer to a +**
    The SQLITE_CONFIG_LOG option is used to configure the SQLite +** global [error log]. +** (^The SQLITE_CONFIG_LOG option takes two arguments: a pointer to a ** function with a call signature of void(*)(void*,int,const char*), ** and a pointer to void. ^If the function pointer is not NULL, it is ** invoked by [sqlite3_log()] to process each logging event. ^If the @@ -2191,12 +2211,12 @@ struct sqlite3_mem_methods { **
    SQLITE_CONFIG_PCACHE and SQLITE_CONFIG_GETPCACHE **
    These options are obsolete and should not be used by new code. ** They are retained for backwards compatibility but are now no-ops. -** +**
    ** ** [[SQLITE_CONFIG_SQLLOG]] **
    SQLITE_CONFIG_SQLLOG **
    This option is only available if sqlite is compiled with the -** SQLITE_ENABLE_SQLLOG pre-processor macro defined. The first argument should +** [SQLITE_ENABLE_SQLLOG] pre-processor macro defined. The first argument should ** be a pointer to a function of type void(*)(void*,sqlite3*,const char*, int). ** The second should be of type (void*). The callback is invoked by the library ** in three separate circumstances, identified by the value passed as the @@ -2206,7 +2226,23 @@ struct sqlite3_mem_methods { ** fourth parameter is 1, then the SQL statement that the third parameter ** points to has just been executed. Or, if the fourth parameter is 2, then ** the connection being passed as the second parameter is being closed. The -** third parameter is passed NULL In this case. +** third parameter is passed NULL In this case. An example of using this +** configuration option can be seen in the "test_sqllog.c" source file in +** the canonical SQLite source tree.
    +** +** [[SQLITE_CONFIG_MMAP_SIZE]] +**
    SQLITE_CONFIG_MMAP_SIZE +**
    SQLITE_CONFIG_MMAP_SIZE takes two 64-bit integer (sqlite3_int64) values +** that are the default mmap size limit (the default setting for +** [PRAGMA mmap_size]) and the maximum allowed mmap size limit. +** The default setting can be overridden by each database connection using +** either the [PRAGMA mmap_size] command, or by using the +** [SQLITE_FCNTL_MMAP_SIZE] file control. The maximum allowed mmap size +** cannot be changed at run-time. Nor may the maximum allowed mmap size +** exceed the compile-time maximum mmap size set by the +** [SQLITE_MAX_MMAP_SIZE] compile-time option. +** If either argument to this option is negative, then that argument is +** changed to its compile-time default. ** */ #define SQLITE_CONFIG_SINGLETHREAD 1 /* nil */ @@ -2230,6 +2266,7 @@ struct sqlite3_mem_methods { #define SQLITE_CONFIG_GETPCACHE2 19 /* sqlite3_pcache_methods2* */ #define SQLITE_CONFIG_COVERING_INDEX_SCAN 20 /* int */ #define SQLITE_CONFIG_SQLLOG 21 /* xSqllog, void* */ +#define SQLITE_CONFIG_MMAP_SIZE 22 /* sqlite3_int64, sqlite3_int64 */ /* ** CAPI3REF: Database Connection Configuration Options @@ -3063,6 +3100,9 @@ SQLITE_API int sqlite3_set_authorizer( ** as each triggered subprogram is entered. The callbacks for triggers ** contain a UTF-8 SQL comment that identifies the trigger.)^ ** +** The [SQLITE_TRACE_SIZE_LIMIT] compile-time option can be used to limit +** the length of [bound parameter] expansion in the output of sqlite3_trace(). +** ** ^The callback function registered by sqlite3_profile() is invoked ** as each SQL statement finishes. ^The profile callback contains ** the original statement text and an estimate of wall-clock time @@ -3601,7 +3641,8 @@ SQLITE_API int sqlite3_limit(sqlite3*, int id, int newVal); **
  • ** ^If the database schema changes, instead of returning [SQLITE_SCHEMA] as it ** always used to do, [sqlite3_step()] will automatically recompile the SQL -** statement and try to run it again. +** statement and try to run it again. As many as [SQLITE_MAX_SCHEMA_RETRY] +** retries will occur before sqlite3_step() gives up and returns an error. **
  • ** **
  • @@ -3805,6 +3846,9 @@ typedef struct sqlite3_context sqlite3_context; ** parameter [SQLITE_LIMIT_VARIABLE_NUMBER] (default value: 999). ** ** ^The third argument is the value to bind to the parameter. +** ^If the third parameter to sqlite3_bind_text() or sqlite3_bind_text16() +** or sqlite3_bind_blob() is a NULL pointer then the fourth parameter +** is ignored and the end result is the same as sqlite3_bind_null(). ** ** ^(In those routines that have a fourth argument, its value is the ** number of bytes in the parameter. To be clear: the value is the @@ -4761,7 +4805,7 @@ SQLITE_API void sqlite3_set_auxdata(sqlite3_context*, int N, void*, void (*)(voi ** the content before returning. ** ** The typedef is necessary to work around problems in certain -** C++ compilers. See ticket #2191. +** C++ compilers. */ typedef void (*sqlite3_destructor_type)(void*); #define SQLITE_STATIC ((sqlite3_destructor_type)0) @@ -5560,11 +5604,20 @@ SQLITE_API int sqlite3_table_column_metadata( ** ^This interface loads an SQLite extension library from the named file. ** ** ^The sqlite3_load_extension() interface attempts to load an -** SQLite extension library contained in the file zFile. +** [SQLite extension] library contained in the file zFile. If +** the file cannot be loaded directly, attempts are made to load +** with various operating-system specific extensions added. +** So for example, if "samplelib" cannot be loaded, then names like +** "samplelib.so" or "samplelib.dylib" or "samplelib.dll" might +** be tried also. ** ** ^The entry point is zProc. -** ^zProc may be 0, in which case the name of the entry point -** defaults to "sqlite3_extension_init". +** ^(zProc may be 0, in which case SQLite will try to come up with an +** entry point name on its own. It first tries "sqlite3_extension_init". +** If that does not work, it constructs a name "sqlite3_X_init" where the +** X is consists of the lower-case equivalent of all ASCII alphabetic +** characters in the filename from the last "/" to the first following +** "." and omitting any initial "lib".)^ ** ^The sqlite3_load_extension() interface returns ** [SQLITE_OK] on success and [SQLITE_ERROR] if something goes wrong. ** ^If an error occurs and pzErrMsg is not 0, then the @@ -5590,11 +5643,11 @@ SQLITE_API int sqlite3_load_extension( ** CAPI3REF: Enable Or Disable Extension Loading ** ** ^So as not to open security holes in older applications that are -** unprepared to deal with extension loading, and as a means of disabling -** extension loading while evaluating user-entered SQL, the following API +** unprepared to deal with [extension loading], and as a means of disabling +** [extension loading] while evaluating user-entered SQL, the following API ** is provided to turn the [sqlite3_load_extension()] mechanism on and off. ** -** ^Extension loading is off by default. See ticket #1863. +** ^Extension loading is off by default. ** ^Call the sqlite3_enable_load_extension() routine with onoff==1 ** to turn extension loading on and call it with onoff==0 to turn ** it back off again. @@ -5606,7 +5659,7 @@ SQLITE_API int sqlite3_enable_load_extension(sqlite3 *db, int onoff); ** ** ^This interface causes the xEntryPoint() function to be invoked for ** each new [database connection] that is created. The idea here is that -** xEntryPoint() is the entry point for a statically linked SQLite extension +** xEntryPoint() is the entry point for a statically linked [SQLite extension] ** that is to be automatically loaded into all new database connections. ** ** ^(Even though the function prototype shows that xEntryPoint() takes @@ -7386,10 +7439,25 @@ SQLITE_API int sqlite3_unlock_notify( SQLITE_API int sqlite3_stricmp(const char *, const char *); SQLITE_API int sqlite3_strnicmp(const char *, const char *, int); +/* +** CAPI3REF: String Globbing +* +** ^The [sqlite3_strglob(P,X)] interface returns zero if string X matches +** the glob pattern P, and it returns non-zero if string X does not match +** the glob pattern P. ^The definition of glob pattern matching used in +** [sqlite3_strglob(P,X)] is the same as for the "X GLOB P" operator in the +** SQL dialect used by SQLite. ^The sqlite3_strglob(P,X) function is case +** sensitive. +** +** Note that this routine returns zero on a match and non-zero if the strings +** do not match, the same as [sqlite3_stricmp()] and [sqlite3_strnicmp()]. +*/ +SQLITE_API int sqlite3_strglob(const char *zGlob, const char *zStr); + /* ** CAPI3REF: Error Logging Interface ** -** ^The [sqlite3_log()] interface writes a message into the error log +** ^The [sqlite3_log()] interface writes a message into the [error log] ** established by the [SQLITE_CONFIG_LOG] option to [sqlite3_config()]. ** ^If logging is enabled, the zFormat string and subsequent arguments are ** used with [sqlite3_snprintf()] to generate the final output string. @@ -8074,6 +8142,7 @@ SQLITE_PRIVATE void sqlite3HashClear(Hash*); */ #ifndef SQLITE_TEMP_STORE # define SQLITE_TEMP_STORE 1 +# define SQLITE_TEMP_STORE_xc 1 /* Exclude from ctime.c */ #endif /* @@ -8221,6 +8290,49 @@ SQLITE_PRIVATE const int sqlite3one; # define EIGHT_BYTE_ALIGNMENT(X) ((((char*)(X) - (char*)0)&7)==0) #endif +/* +** Disable MMAP on platforms where it is known to not work +*/ +#if defined(__OpenBSD__) || defined(__QNXNTO__) +# undef SQLITE_MAX_MMAP_SIZE +# define SQLITE_MAX_MMAP_SIZE 0 +#endif + +/* +** Default maximum size of memory used by memory-mapped I/O in the VFS +*/ +#ifdef __APPLE__ +# include +# if TARGET_OS_IPHONE +# undef SQLITE_MAX_MMAP_SIZE +# define SQLITE_MAX_MMAP_SIZE 0 +# endif +#endif +#ifndef SQLITE_MAX_MMAP_SIZE +# if defined(__linux__) \ + || defined(_WIN32) \ + || (defined(__APPLE__) && defined(__MACH__)) \ + || defined(__sun) +# define SQLITE_MAX_MMAP_SIZE 0x7fff0000 /* 2147418112 */ +# else +# define SQLITE_MAX_MMAP_SIZE 0 +# endif +# define SQLITE_MAX_MMAP_SIZE_xc 1 /* exclude from ctime.c */ +#endif + +/* +** The default MMAP_SIZE is zero on all platforms. Or, even if a larger +** default MMAP_SIZE is specified at compile-time, make sure that it does +** not exceed the maximum mmap size. +*/ +#ifndef SQLITE_DEFAULT_MMAP_SIZE +# define SQLITE_DEFAULT_MMAP_SIZE 0 +# define SQLITE_DEFAULT_MMAP_SIZE_xc 1 /* Exclude from ctime.c */ +#endif +#if SQLITE_DEFAULT_MMAP_SIZE>SQLITE_MAX_MMAP_SIZE +# undef SQLITE_DEFAULT_MMAP_SIZE +# define SQLITE_DEFAULT_MMAP_SIZE SQLITE_MAX_MMAP_SIZE +#endif /* ** An instance of the following structure is used to store the busy-handler @@ -8442,6 +8554,7 @@ SQLITE_PRIVATE int sqlite3BtreeOpen( SQLITE_PRIVATE int sqlite3BtreeClose(Btree*); SQLITE_PRIVATE int sqlite3BtreeSetCacheSize(Btree*,int); +SQLITE_PRIVATE int sqlite3BtreeSetMmapLimit(Btree*,sqlite3_int64); SQLITE_PRIVATE int sqlite3BtreeSetSafetyLevel(Btree*,int,int,int); SQLITE_PRIVATE int sqlite3BtreeSyncDisabled(Btree*); SQLITE_PRIVATE int sqlite3BtreeSetPageSize(Btree *p, int nPagesize, int nReserve, int eFix); @@ -8518,6 +8631,7 @@ SQLITE_PRIVATE int sqlite3BtreeNewDb(Btree *p); #define BTREE_TEXT_ENCODING 5 #define BTREE_USER_VERSION 6 #define BTREE_INCR_VACUUM 7 +#define BTREE_APPLICATION_ID 8 /* ** Values that may be OR'd together to form the second argument of an @@ -9142,6 +9256,12 @@ typedef struct PgHdr DbPage; #define PAGER_JOURNALMODE_MEMORY 4 /* In-memory journal file */ #define PAGER_JOURNALMODE_WAL 5 /* Use write-ahead logging */ +/* +** Flags that make up the mask passed to sqlite3PagerAcquire(). +*/ +#define PAGER_ACQUIRE_NOCONTENT 0x01 /* Do not load data from disk */ +#define PAGER_ACQUIRE_READONLY 0x02 /* Read-only page is acceptable */ + /* ** The remainder of this file contains the declarations of the functions ** that make up the Pager sub-system API. See source code comments for @@ -9166,6 +9286,7 @@ SQLITE_PRIVATE void sqlite3PagerSetBusyhandler(Pager*, int(*)(void *), void *); SQLITE_PRIVATE int sqlite3PagerSetPagesize(Pager*, u32*, int); SQLITE_PRIVATE int sqlite3PagerMaxPageCount(Pager*, int); SQLITE_PRIVATE void sqlite3PagerSetCachesize(Pager*, int); +SQLITE_PRIVATE void sqlite3PagerSetMmapLimit(Pager *, sqlite3_int64); SQLITE_PRIVATE void sqlite3PagerShrink(Pager*); SQLITE_PRIVATE void sqlite3PagerSetSafetyLevel(Pager*,int,int,int); SQLITE_PRIVATE int sqlite3PagerLockingMode(Pager *, int); @@ -9312,6 +9433,8 @@ struct PgHdr { #define PGHDR_REUSE_UNLIKELY 0x010 /* A hint that reuse is unlikely */ #define PGHDR_DONT_WRITE 0x020 /* Do not write content to disk */ +#define PGHDR_MMAP 0x040 /* This is an mmap page object */ + /* Initialize and shutdown the page cache subsystem */ SQLITE_PRIVATE int sqlite3PcacheInitialize(void); SQLITE_PRIVATE void sqlite3PcacheShutdown(void); @@ -9523,14 +9646,6 @@ SQLITE_PRIVATE void sqlite3PCacheSetDefault(void); # define SQLITE_OS_WINRT 0 #endif -/* -** When compiled for WinCE or WinRT, there is no concept of the current -** directory. - */ -#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT -# define SQLITE_CURDIR 1 -#endif - /* If the SET_FULLSYNC macro is not defined above, then make it ** a no-op */ @@ -9683,6 +9798,8 @@ SQLITE_PRIVATE int sqlite3OsShmMap(sqlite3_file *,int,int,int,void volatile **); SQLITE_PRIVATE int sqlite3OsShmLock(sqlite3_file *id, int, int, int); SQLITE_PRIVATE void sqlite3OsShmBarrier(sqlite3_file *id); SQLITE_PRIVATE int sqlite3OsShmUnmap(sqlite3_file *id, int); +SQLITE_PRIVATE int sqlite3OsFetch(sqlite3_file *id, i64, int, void **); +SQLITE_PRIVATE int sqlite3OsUnfetch(sqlite3_file *, i64, void *); /* @@ -9922,6 +10039,7 @@ struct sqlite3 { int nDb; /* Number of backends currently in use */ int flags; /* Miscellaneous flags. See below */ i64 lastRowid; /* ROWID of most recent insert (see above) */ + i64 szMmap; /* Default mmap_size setting */ unsigned int openFlags; /* Flags passed to sqlite3_vfs.xOpen() */ int errCode; /* Most recent error code (SQLITE_*) */ int errMask; /* & result codes with this before returning */ @@ -11158,6 +11276,8 @@ struct NameContext { #define NC_HasAgg 0x02 /* One or more aggregate functions seen */ #define NC_IsCheck 0x04 /* True if resolving names in a CHECK constraint */ #define NC_InAggFunc 0x08 /* True if analyzing arguments to an agg func */ +#define NC_AsMaybe 0x10 /* Resolve to AS terms of the result set only + ** if no other resolution is available */ /* ** An instance of the following structure contains all information @@ -11593,6 +11713,8 @@ struct Sqlite3Config { void *pHeap; /* Heap storage space */ int nHeap; /* Size of pHeap[] */ int mnReq, mxReq; /* Min and max heap requests sizes */ + sqlite3_int64 szMmap; /* mmap() space per open file */ + sqlite3_int64 mxMmap; /* Maximum value for szMmap */ void *pScratch; /* Scratch memory */ int szScratch; /* Size of each scratch buffer */ int nScratch; /* Number of scratch buffers */ @@ -11627,6 +11749,7 @@ struct Walker { int (*xSelectCallback)(Walker*,Select*); /* Callback for SELECTs */ Parse *pParse; /* Parser context. */ int walkerDepth; /* Number of subqueries */ + u8 bSelectDepthFirst; /* Do subqueries first */ union { /* Extra data for callback */ NameContext *pNC; /* Naming context */ int i; /* Integer value */ @@ -12130,6 +12253,12 @@ SQLITE_PRIVATE void sqlite3Error(sqlite3*, int, const char*,...); SQLITE_PRIVATE void *sqlite3HexToBlob(sqlite3*, const char *z, int n); SQLITE_PRIVATE u8 sqlite3HexToInt(int h); SQLITE_PRIVATE int sqlite3TwoPartName(Parse *, Token *, Token *, Token **); + +#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) || \ + defined(SQLITE_DEBUG_OS_TRACE) +SQLITE_PRIVATE const char *sqlite3ErrName(int); +#endif + SQLITE_PRIVATE const char *sqlite3ErrStr(int); SQLITE_PRIVATE int sqlite3ReadSchema(Parse *pParse); SQLITE_PRIVATE CollSeq *sqlite3FindCollSeq(sqlite3*,u8 enc, const char*,int); @@ -12614,6 +12743,8 @@ SQLITE_PRIVATE SQLITE_WSD struct Sqlite3Config sqlite3Config = { (void*)0, /* pHeap */ 0, /* nHeap */ 0, 0, /* mnHeap, mxHeap */ + SQLITE_DEFAULT_MMAP_SIZE, /* szMmap */ + SQLITE_MAX_MMAP_SIZE, /* mxMmap */ (void*)0, /* pScratch */ 0, /* szScratch */ 0, /* nScratch */ @@ -12737,15 +12868,15 @@ static const char * const azCompileOpt[] = { #ifdef SQLITE_COVERAGE_TEST "COVERAGE_TEST", #endif -#ifdef SQLITE_CURDIR - "CURDIR", -#endif #ifdef SQLITE_DEBUG "DEBUG", #endif #ifdef SQLITE_DEFAULT_LOCKING_MODE "DEFAULT_LOCKING_MODE=" CTIMEOPT_VAL(SQLITE_DEFAULT_LOCKING_MODE), #endif +#if defined(SQLITE_DEFAULT_MMAP_SIZE) && !defined(SQLITE_DEFAULT_MMAP_SIZE_xc) + "DEFAULT_MMAP_SIZE=" CTIMEOPT_VAL(SQLITE_DEFAULT_MMAP_SIZE), +#endif #ifdef SQLITE_DISABLE_DIRSYNC "DISABLE_DIRSYNC", #endif @@ -12836,6 +12967,9 @@ static const char * const azCompileOpt[] = { #ifdef SQLITE_LOCK_TRACE "LOCK_TRACE", #endif +#if defined(SQLITE_MAX_MMAP_SIZE) && !defined(SQLITE_MAX_MMAP_SIZE_xc) + "MAX_MMAP_SIZE=" CTIMEOPT_VAL(SQLITE_MAX_MMAP_SIZE), +#endif #ifdef SQLITE_MAX_SCHEMA_RETRY "MAX_SCHEMA_RETRY=" CTIMEOPT_VAL(SQLITE_MAX_SCHEMA_RETRY), #endif @@ -12893,11 +13027,6 @@ static const char * const azCompileOpt[] = { #ifdef SQLITE_OMIT_CHECK "OMIT_CHECK", #endif -/* // redundant -** #ifdef SQLITE_OMIT_COMPILEOPTION_DIAGS -** "OMIT_COMPILEOPTION_DIAGS", -** #endif -*/ #ifdef SQLITE_OMIT_COMPLETE "OMIT_COMPLETE", #endif @@ -13039,13 +13168,13 @@ static const char * const azCompileOpt[] = { #ifdef SQLITE_TCL "TCL", #endif -#ifdef SQLITE_TEMP_STORE +#if defined(SQLITE_TEMP_STORE) && !defined(SQLITE_TEMP_STORE_xc) "TEMP_STORE=" CTIMEOPT_VAL(SQLITE_TEMP_STORE), #endif #ifdef SQLITE_TEST "TEST", #endif -#ifdef SQLITE_THREADSAFE +#if defined(SQLITE_THREADSAFE) "THREADSAFE=" CTIMEOPT_VAL(SQLITE_THREADSAFE), #endif #ifdef SQLITE_USE_ALLOCA @@ -13071,8 +13200,11 @@ SQLITE_API int sqlite3_compileoption_used(const char *zOptName){ /* Since ArraySize(azCompileOpt) is normally in single digits, a ** linear search is adequate. No need for a binary search. */ for(i=0; ipMethods->xShmMap(id, iPage, pgsz, bExtend, pp); } +#if SQLITE_MAX_MMAP_SIZE>0 +/* The real implementation of xFetch and xUnfetch */ +SQLITE_PRIVATE int sqlite3OsFetch(sqlite3_file *id, i64 iOff, int iAmt, void **pp){ + DO_OS_MALLOC_TEST(id); + return id->pMethods->xFetch(id, iOff, iAmt, pp); +} +SQLITE_PRIVATE int sqlite3OsUnfetch(sqlite3_file *id, i64 iOff, void *p){ + return id->pMethods->xUnfetch(id, iOff, p); +} +#else +/* No-op stubs to use when memory-mapped I/O is disabled */ +SQLITE_PRIVATE int sqlite3OsFetch(sqlite3_file *id, i64 iOff, int iAmt, void **pp){ + *pp = 0; + return SQLITE_OK; +} +SQLITE_PRIVATE int sqlite3OsUnfetch(sqlite3_file *id, i64 iOff, void *p){ + return SQLITE_OK; +} +#endif + /* ** The next group of routines are convenience wrappers around the ** VFS methods. @@ -22851,7 +23011,7 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){ /* #include */ #include #include -#ifndef SQLITE_OMIT_WAL +#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 #include #endif @@ -22950,6 +23110,11 @@ struct unixFile { const char *zPath; /* Name of the file */ unixShm *pShm; /* Shared memory segment information */ int szChunk; /* Configured by FCNTL_CHUNK_SIZE */ + int nFetchOut; /* Number of outstanding xFetch refs */ + sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */ + sqlite3_int64 mmapSizeActual; /* Actual size of mapping at pMapRegion */ + sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */ + void *pMapRegion; /* Memory mapped region */ #ifdef __QNXNTO__ int sectorSize; /* Device sector size */ int deviceCharacteristics; /* Precomputed device characteristics */ @@ -22974,7 +23139,9 @@ struct unixFile { unsigned char transCntrChng; /* True if the transaction counter changed */ unsigned char dbUpdate; /* True if any part of database file changed */ unsigned char inNormalWrite; /* True if in a normal write operation */ + #endif + #ifdef SQLITE_TEST /* In test mode, increase the size of this structure a bit so that ** it is larger than the struct CrashFile defined in test6.c. @@ -22998,6 +23165,7 @@ struct unixFile { #define UNIXFILE_DELETE 0x20 /* Delete on close */ #define UNIXFILE_URI 0x40 /* Filename might have query parameters */ #define UNIXFILE_NOLOCK 0x80 /* Do no file locking */ +#define UNIXFILE_WARNED 0x0100 /* verifyDbFile() warnings have been issued */ /* ** Include code that is common to all os_*.c files @@ -23239,6 +23407,17 @@ SQLITE_API int sqlite3_open_file_count = 0; #define threadid 0 #endif +/* +** HAVE_MREMAP defaults to true on Linux and false everywhere else. +*/ +#if !defined(HAVE_MREMAP) +# if defined(__linux__) && defined(_GNU_SOURCE) +# define HAVE_MREMAP 1 +# else +# define HAVE_MREMAP 0 +# endif +#endif + /* ** Different Unix systems declare open() in different ways. Same use ** open(const char*,int,mode_t). Others use open(const char*,int,...). @@ -23263,9 +23442,6 @@ static int posixFchown(int fd, uid_t uid, gid_t gid){ /* Forward reference */ static int openDirectory(const char*, int*); -/* Fix for "error: 'fchmod' undeclared here (not in a function)" on FreeBSD 9 */ -int fchmod(int, mode_t); - /* ** Many system calls are accessed through pointer-to-functions so that ** they may be overridden at runtime to facilitate fault injection during @@ -23373,6 +23549,19 @@ static struct unix_syscall { { "fchown", (sqlite3_syscall_ptr)posixFchown, 0 }, #define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent) + { "mmap", (sqlite3_syscall_ptr)mmap, 0 }, +#define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[21].pCurrent) + + { "munmap", (sqlite3_syscall_ptr)munmap, 0 }, +#define osMunmap ((void*(*)(void*,size_t))aSyscall[22].pCurrent) + +#if HAVE_MREMAP + { "mremap", (sqlite3_syscall_ptr)mremap, 0 }, +#else + { "mremap", (sqlite3_syscall_ptr)0, 0 }, +#endif +#define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[23].pCurrent) + }; /* End of the overrideable system calls */ /* @@ -23704,7 +23893,6 @@ static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) { } - /****************************************************************************** ****************** Begin Unique File ID Utility Used By VxWorks *************** ** @@ -24040,7 +24228,6 @@ static int unixLogErrorAtLine( zErr = strerror(iErrno); #endif - assert( errcode!=SQLITE_OK ); if( zPath==0 ) zPath = ""; sqlite3_log(errcode, "os_unix.c:%d: (%d) %s(%s) - %s", @@ -24206,6 +24393,50 @@ static int findInodeInfo( } +/* +** Check a unixFile that is a database. Verify the following: +** +** (1) There is exactly one hard link on the file +** (2) The file is not a symbolic link +** (3) The file has not been renamed or unlinked +** +** Issue sqlite3_log(SQLITE_WARNING,...) messages if anything is not right. +*/ +static void verifyDbFile(unixFile *pFile){ + struct stat buf; + int rc; + if( pFile->ctrlFlags & UNIXFILE_WARNED ){ + /* One or more of the following warnings have already been issued. Do not + ** repeat them so as not to clutter the error log */ + return; + } + rc = osFstat(pFile->h, &buf); + if( rc!=0 ){ + sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s", pFile->zPath); + pFile->ctrlFlags |= UNIXFILE_WARNED; + return; + } + if( buf.st_nlink==0 && (pFile->ctrlFlags & UNIXFILE_DELETE)==0 ){ + sqlite3_log(SQLITE_WARNING, "file unlinked while open: %s", pFile->zPath); + pFile->ctrlFlags |= UNIXFILE_WARNED; + return; + } + if( buf.st_nlink>1 ){ + sqlite3_log(SQLITE_WARNING, "multiple links to file: %s", pFile->zPath); + pFile->ctrlFlags |= UNIXFILE_WARNED; + return; + } + if( pFile->pInode!=0 + && ((rc = osStat(pFile->zPath, &buf))!=0 + || buf.st_ino!=pFile->pInode->fileId.ino) + ){ + sqlite3_log(SQLITE_WARNING, "file renamed while open: %s", pFile->zPath); + pFile->ctrlFlags |= UNIXFILE_WARNED; + return; + } +} + + /* ** This routine checks if there is a RESERVED lock held on the specified ** file by this or any other process. If such a lock is held, set *pResOut @@ -24736,9 +24967,13 @@ end_unlock: ** the requested locking level, this routine is a no-op. */ static int unixUnlock(sqlite3_file *id, int eFileLock){ + assert( eFileLock==SHARED_LOCK || ((unixFile *)id)->nFetchOut==0 ); return posixUnlock(id, eFileLock, 0); } +static int unixMapfile(unixFile *pFd, i64 nByte); +static void unixUnmapfile(unixFile *pFd); + /* ** This function performs the parts of the "close file" operation ** common to all locking schemes. It closes the directory and file @@ -24751,6 +24986,7 @@ static int unixUnlock(sqlite3_file *id, int eFileLock){ */ static int closeUnixFile(sqlite3_file *id){ unixFile *pFile = (unixFile*)id; + unixUnmapfile(pFile); if( pFile->h>=0 ){ robust_close(pFile, pFile->h, __LINE__); pFile->h = -1; @@ -24777,6 +25013,7 @@ static int closeUnixFile(sqlite3_file *id){ static int unixClose(sqlite3_file *id){ int rc = SQLITE_OK; unixFile *pFile = (unixFile *)id; + verifyDbFile(pFile); unixUnlock(id, NO_LOCK); unixEnterMutex(); @@ -26008,6 +26245,8 @@ static int unixRead( unixFile *pFile = (unixFile *)id; int got; assert( id ); + assert( offset>=0 ); + assert( amt>0 ); /* If this is a database file (not a journal, master-journal or temp ** file), the bytes in the locking range should never be read or written. */ @@ -26018,6 +26257,23 @@ static int unixRead( ); #endif +#if SQLITE_MAX_MMAP_SIZE>0 + /* Deal with as much of this read request as possible by transfering + ** data from the memory mapping using memcpy(). */ + if( offsetmmapSize ){ + if( offset+amt <= pFile->mmapSize ){ + memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt); + return SQLITE_OK; + }else{ + int nCopy = pFile->mmapSize - offset; + memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy); + pBuf = &((u8 *)pBuf)[nCopy]; + amt -= nCopy; + offset += nCopy; + } + } +#endif + got = seekAndRead(pFile, offset, pBuf, amt); if( got==amt ){ return SQLITE_OK; @@ -26032,6 +26288,51 @@ static int unixRead( } } +/* +** Attempt to seek the file-descriptor passed as the first argument to +** absolute offset iOff, then attempt to write nBuf bytes of data from +** pBuf to it. If an error occurs, return -1 and set *piErrno. Otherwise, +** return the actual number of bytes written (which may be less than +** nBuf). +*/ +static int seekAndWriteFd( + int fd, /* File descriptor to write to */ + i64 iOff, /* File offset to begin writing at */ + const void *pBuf, /* Copy data from this buffer to the file */ + int nBuf, /* Size of buffer pBuf in bytes */ + int *piErrno /* OUT: Error number if error occurs */ +){ + int rc = 0; /* Value returned by system call */ + + assert( nBuf==(nBuf&0x1ffff) ); + nBuf &= 0x1ffff; + TIMER_START; + +#if defined(USE_PREAD) + do{ rc = osPwrite(fd, pBuf, nBuf, iOff); }while( rc<0 && errno==EINTR ); +#elif defined(USE_PREAD64) + do{ rc = osPwrite64(fd, pBuf, nBuf, iOff);}while( rc<0 && errno==EINTR); +#else + do{ + i64 iSeek = lseek(fd, iOff, SEEK_SET); + SimulateIOError( iSeek-- ); + + if( iSeek!=iOff ){ + if( piErrno ) *piErrno = (iSeek==-1 ? errno : 0); + return -1; + } + rc = osWrite(fd, pBuf, nBuf); + }while( rc<0 && errno==EINTR ); +#endif + + TIMER_END; + OSTRACE(("WRITE %-3d %5d %7lld %llu\n", fd, rc, iOff, TIMER_ELAPSED)); + + if( rc<0 && piErrno ) *piErrno = errno; + return rc; +} + + /* ** Seek to the offset in id->offset then read cnt bytes into pBuf. ** Return the number of bytes actually read. Update the offset. @@ -26040,39 +26341,7 @@ static int unixRead( ** is set before returning. */ static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){ - int got; -#if (!defined(USE_PREAD) && !defined(USE_PREAD64)) - i64 newOffset; -#endif - assert( cnt==(cnt&0x1ffff) ); - cnt &= 0x1ffff; - TIMER_START; -#if defined(USE_PREAD) - do{ got = osPwrite(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR ); -#elif defined(USE_PREAD64) - do{ got = osPwrite64(id->h, pBuf, cnt, offset);}while( got<0 && errno==EINTR); -#else - do{ - newOffset = lseek(id->h, offset, SEEK_SET); - SimulateIOError( newOffset-- ); - if( newOffset!=offset ){ - if( newOffset == -1 ){ - ((unixFile*)id)->lastErrno = errno; - }else{ - ((unixFile*)id)->lastErrno = 0; - } - return -1; - } - got = osWrite(id->h, pBuf, cnt); - }while( got<0 && errno==EINTR ); -#endif - TIMER_END; - if( got<0 ){ - ((unixFile*)id)->lastErrno = errno; - } - - OSTRACE(("WRITE %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED)); - return got; + return seekAndWriteFd(id->h, offset, pBuf, cnt, &id->lastErrno); } @@ -26122,6 +26391,23 @@ static int unixWrite( } #endif +#if SQLITE_MAX_MMAP_SIZE>0 + /* Deal with as much of this write request as possible by transfering + ** data from the memory mapping using memcpy(). */ + if( offsetmmapSize ){ + if( offset+amt <= pFile->mmapSize ){ + memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt); + return SQLITE_OK; + }else{ + int nCopy = pFile->mmapSize - offset; + memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy); + pBuf = &((u8 *)pBuf)[nCopy]; + amt -= nCopy; + offset += nCopy; + } + } +#endif + while( amt>0 && (wrote = seekAndWrite(pFile, offset, pBuf, amt))>0 ){ amt -= wrote; offset += wrote; @@ -26404,6 +26690,14 @@ static int unixTruncate(sqlite3_file *id, i64 nByte){ } #endif + /* If the file was just truncated to a size smaller than the currently + ** mapped region, reduce the effective mapping size as well. SQLite will + ** use read() and write() to access data beyond this point from now on. + */ + if( nBytemmapSize ){ + pFile->mmapSize = nByte; + } + return SQLITE_OK; } } @@ -26492,6 +26786,19 @@ static int fcntlSizeHint(unixFile *pFile, i64 nByte){ } } + if( pFile->mmapSizeMax>0 && nByte>pFile->mmapSize ){ + int rc; + if( pFile->szChunk<=0 ){ + if( robust_ftruncate(pFile->h, nByte) ){ + pFile->lastErrno = errno; + return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); + } + } + + rc = unixMapfile(pFile, nByte); + return rc; + } + return SQLITE_OK; } @@ -26559,6 +26866,18 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ } return SQLITE_OK; } + case SQLITE_FCNTL_MMAP_SIZE: { + i64 newLimit = *(i64*)pArg; + if( newLimit>sqlite3GlobalConfig.mxMmap ){ + newLimit = sqlite3GlobalConfig.mxMmap; + } + *(i64*)pArg = pFile->mmapSizeMax; + if( newLimit>=0 ){ + pFile->mmapSizeMax = newLimit; + if( newLimitmmapSize ) pFile->mmapSize = newLimit; + } + return SQLITE_OK; + } #ifdef SQLITE_DEBUG /* The pager calls this method to signal that it has done ** a rollback and that the database is therefore unchanged and @@ -26871,7 +27190,7 @@ static void unixShmPurge(unixFile *pFd){ sqlite3_mutex_free(p->mutex); for(i=0; inRegion; i++){ if( p->h>=0 ){ - munmap(p->apRegion[i], p->szRegion); + osMunmap(p->apRegion[i], p->szRegion); }else{ sqlite3_free(p->apRegion[i]); } @@ -27111,24 +27430,32 @@ static int unixShmMap( if( sStat.st_sizeh, sStat.st_size, nByte)!=0 ){ - rc = unixLogError(SQLITE_IOERR_SHMSIZE, "fallocate", - pShmNode->zFilename); + if( !bExtend ){ goto shmpage_out; } -#else - if( robust_ftruncate(pShmNode->h, nByte) ){ - rc = unixLogError(SQLITE_IOERR_SHMSIZE, "ftruncate", - pShmNode->zFilename); - goto shmpage_out; + + /* Alternatively, if bExtend is true, extend the file. Do this by + ** writing a single byte to the end of each (OS) page being + ** allocated or extended. Technically, we need only write to the + ** last page in order to extend the file. But writing to all new + ** pages forces the OS to allocate them immediately, which reduces + ** the chances of SIGBUS while accessing the mapped region later on. + */ + else{ + static const int pgsz = 4096; + int iPg; + + /* Write to the last byte of each newly allocated or extended page */ + assert( (nByte % pgsz)==0 ); + for(iPg=(sStat.st_size/pgsz); iPg<(nByte/pgsz); iPg++){ + if( seekAndWriteFd(pShmNode->h, iPg*pgsz + pgsz-1, "", 1, 0)!=1 ){ + const char *zFile = pShmNode->zFilename; + rc = unixLogError(SQLITE_IOERR_SHMSIZE, "write", zFile); + goto shmpage_out; + } + } } -#endif } } @@ -27144,7 +27471,7 @@ static int unixShmMap( while(pShmNode->nRegion<=iRegion){ void *pMem; if( pShmNode->h>=0 ){ - pMem = mmap(0, szRegion, + pMem = osMmap(0, szRegion, pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion ); @@ -27361,6 +27688,236 @@ static int unixShmUnmap( # define unixShmUnmap 0 #endif /* #ifndef SQLITE_OMIT_WAL */ +/* +** If it is currently memory mapped, unmap file pFd. +*/ +static void unixUnmapfile(unixFile *pFd){ + assert( pFd->nFetchOut==0 ); +#if SQLITE_MAX_MMAP_SIZE>0 + if( pFd->pMapRegion ){ + osMunmap(pFd->pMapRegion, pFd->mmapSizeActual); + pFd->pMapRegion = 0; + pFd->mmapSize = 0; + pFd->mmapSizeActual = 0; + } +#endif +} + +#if SQLITE_MAX_MMAP_SIZE>0 +/* +** Return the system page size. +*/ +static int unixGetPagesize(void){ +#if HAVE_MREMAP + return 512; +#elif defined(_BSD_SOURCE) + return getpagesize(); +#else + return (int)sysconf(_SC_PAGESIZE); +#endif +} +#endif /* SQLITE_MAX_MMAP_SIZE>0 */ + +#if SQLITE_MAX_MMAP_SIZE>0 +/* +** Attempt to set the size of the memory mapping maintained by file +** descriptor pFd to nNew bytes. Any existing mapping is discarded. +** +** If successful, this function sets the following variables: +** +** unixFile.pMapRegion +** unixFile.mmapSize +** unixFile.mmapSizeActual +** +** If unsuccessful, an error message is logged via sqlite3_log() and +** the three variables above are zeroed. In this case SQLite should +** continue accessing the database using the xRead() and xWrite() +** methods. +*/ +static void unixRemapfile( + unixFile *pFd, /* File descriptor object */ + i64 nNew /* Required mapping size */ +){ + const char *zErr = "mmap"; + int h = pFd->h; /* File descriptor open on db file */ + u8 *pOrig = (u8 *)pFd->pMapRegion; /* Pointer to current file mapping */ + i64 nOrig = pFd->mmapSizeActual; /* Size of pOrig region in bytes */ + u8 *pNew = 0; /* Location of new mapping */ + int flags = PROT_READ; /* Flags to pass to mmap() */ + + assert( pFd->nFetchOut==0 ); + assert( nNew>pFd->mmapSize ); + assert( nNew<=pFd->mmapSizeMax ); + assert( nNew>0 ); + assert( pFd->mmapSizeActual>=pFd->mmapSize ); + assert( MAP_FAILED!=0 ); + + if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE; + + if( pOrig ){ + const int szSyspage = unixGetPagesize(); + i64 nReuse = (pFd->mmapSize & ~(szSyspage-1)); + u8 *pReq = &pOrig[nReuse]; + + /* Unmap any pages of the existing mapping that cannot be reused. */ + if( nReuse!=nOrig ){ + osMunmap(pReq, nOrig-nReuse); + } + +#if HAVE_MREMAP + pNew = osMremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE); + zErr = "mremap"; +#else + pNew = osMmap(pReq, nNew-nReuse, flags, MAP_SHARED, h, nReuse); + if( pNew!=MAP_FAILED ){ + if( pNew!=pReq ){ + osMunmap(pNew, nNew - nReuse); + pNew = 0; + }else{ + pNew = pOrig; + } + } +#endif + + /* The attempt to extend the existing mapping failed. Free it. */ + if( pNew==MAP_FAILED || pNew==0 ){ + osMunmap(pOrig, nReuse); + } + } + + /* If pNew is still NULL, try to create an entirely new mapping. */ + if( pNew==0 ){ + pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0); + } + + if( pNew==MAP_FAILED ){ + pNew = 0; + nNew = 0; + unixLogError(SQLITE_OK, zErr, pFd->zPath); + + /* If the mmap() above failed, assume that all subsequent mmap() calls + ** will probably fail too. Fall back to using xRead/xWrite exclusively + ** in this case. */ + pFd->mmapSizeMax = 0; + } + pFd->pMapRegion = (void *)pNew; + pFd->mmapSize = pFd->mmapSizeActual = nNew; +} +#endif + +/* +** Memory map or remap the file opened by file-descriptor pFd (if the file +** is already mapped, the existing mapping is replaced by the new). Or, if +** there already exists a mapping for this file, and there are still +** outstanding xFetch() references to it, this function is a no-op. +** +** If parameter nByte is non-negative, then it is the requested size of +** the mapping to create. Otherwise, if nByte is less than zero, then the +** requested size is the size of the file on disk. The actual size of the +** created mapping is either the requested size or the value configured +** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller. +** +** SQLITE_OK is returned if no error occurs (even if the mapping is not +** recreated as a result of outstanding references) or an SQLite error +** code otherwise. +*/ +static int unixMapfile(unixFile *pFd, i64 nByte){ +#if SQLITE_MAX_MMAP_SIZE>0 + i64 nMap = nByte; + int rc; + + assert( nMap>=0 || pFd->nFetchOut==0 ); + if( pFd->nFetchOut>0 ) return SQLITE_OK; + + if( nMap<0 ){ + struct stat statbuf; /* Low-level file information */ + rc = osFstat(pFd->h, &statbuf); + if( rc!=SQLITE_OK ){ + return SQLITE_IOERR_FSTAT; + } + nMap = statbuf.st_size; + } + if( nMap>pFd->mmapSizeMax ){ + nMap = pFd->mmapSizeMax; + } + + if( nMap!=pFd->mmapSize ){ + if( nMap>0 ){ + unixRemapfile(pFd, nMap); + }else{ + unixUnmapfile(pFd); + } + } +#endif + + return SQLITE_OK; +} + +/* +** If possible, return a pointer to a mapping of file fd starting at offset +** iOff. The mapping must be valid for at least nAmt bytes. +** +** If such a pointer can be obtained, store it in *pp and return SQLITE_OK. +** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK. +** Finally, if an error does occur, return an SQLite error code. The final +** value of *pp is undefined in this case. +** +** If this function does return a pointer, the caller must eventually +** release the reference by calling unixUnfetch(). +*/ +static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){ +#if SQLITE_MAX_MMAP_SIZE>0 + unixFile *pFd = (unixFile *)fd; /* The underlying database file */ +#endif + *pp = 0; + +#if SQLITE_MAX_MMAP_SIZE>0 + if( pFd->mmapSizeMax>0 ){ + if( pFd->pMapRegion==0 ){ + int rc = unixMapfile(pFd, -1); + if( rc!=SQLITE_OK ) return rc; + } + if( pFd->mmapSize >= iOff+nAmt ){ + *pp = &((u8 *)pFd->pMapRegion)[iOff]; + pFd->nFetchOut++; + } + } +#endif + return SQLITE_OK; +} + +/* +** If the third argument is non-NULL, then this function releases a +** reference obtained by an earlier call to unixFetch(). The second +** argument passed to this function must be the same as the corresponding +** argument that was passed to the unixFetch() invocation. +** +** Or, if the third argument is NULL, then this function is being called +** to inform the VFS layer that, according to POSIX, any existing mapping +** may now be invalid and should be unmapped. +*/ +static int unixUnfetch(sqlite3_file *fd, i64 iOff, void *p){ + unixFile *pFd = (unixFile *)fd; /* The underlying database file */ + UNUSED_PARAMETER(iOff); + + /* If p==0 (unmap the entire file) then there must be no outstanding + ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference), + ** then there must be at least one outstanding. */ + assert( (p==0)==(pFd->nFetchOut==0) ); + + /* If p!=0, it must match the iOff value. */ + assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] ); + + if( p ){ + pFd->nFetchOut--; + }else{ + unixUnmapfile(pFd); + } + + assert( pFd->nFetchOut>=0 ); + return SQLITE_OK; +} + /* ** Here ends the implementation of all sqlite3_file methods. ** @@ -27419,7 +27976,9 @@ static const sqlite3_io_methods METHOD = { \ unixShmMap, /* xShmMap */ \ unixShmLock, /* xShmLock */ \ unixShmBarrier, /* xShmBarrier */ \ - unixShmUnmap /* xShmUnmap */ \ + unixShmUnmap, /* xShmUnmap */ \ + unixFetch, /* xFetch */ \ + unixUnfetch, /* xUnfetch */ \ }; \ static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \ UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \ @@ -27436,7 +27995,7 @@ static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p) \ IOMETHODS( posixIoFinder, /* Finder function name */ posixIoMethods, /* sqlite3_io_methods object name */ - 2, /* shared memory is enabled */ + 3, /* shared memory and mmap are enabled */ unixClose, /* xClose method */ unixLock, /* xLock method */ unixUnlock, /* xUnlock method */ @@ -27687,6 +28246,7 @@ static int fillInUnixFile( pNew->pVfs = pVfs; pNew->zPath = zFilename; pNew->ctrlFlags = (u8)ctrlFlags; + pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap; if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0), "psow", SQLITE_POWERSAFE_OVERWRITE) ){ pNew->ctrlFlags |= UNIXFILE_PSOW; @@ -27822,15 +28382,15 @@ static int fillInUnixFile( if( h>=0 ) robust_close(pNew, h, __LINE__); h = -1; osUnlink(zFilename); - isDelete = 0; + pNew->ctrlFlags |= UNIXFILE_DELETE; } - if( isDelete ) pNew->ctrlFlags |= UNIXFILE_DELETE; #endif if( rc!=SQLITE_OK ){ if( h>=0 ) robust_close(pNew, h, __LINE__); }else{ pNew->pMethod = pLockingStyle; OpenCounter(+1); + verifyDbFile(pNew); } return rc; } @@ -29924,7 +30484,7 @@ SQLITE_API int sqlite3_os_init(void){ /* Double-check that the aSyscall[] array has been constructed ** correctly. See ticket [bb3a86e890c8e96ab] */ - assert( ArraySize(aSyscall)==21 ); + assert( ArraySize(aSyscall)==24 ); /* Register all VFSes defined in the aVfs[] array */ for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ @@ -30307,11 +30867,20 @@ struct winFile { winceLock local; /* Locks obtained by this instance of winFile */ winceLock *shared; /* Global shared lock memory for the file */ #endif +#if SQLITE_MAX_MMAP_SIZE>0 + int nFetchOut; /* Number of outstanding xFetch references */ + HANDLE hMap; /* Handle for accessing memory mapping */ + void *pMapRegion; /* Area memory mapped */ + sqlite3_int64 mmapSize; /* Usable size of mapped region */ + sqlite3_int64 mmapSizeActual; /* Actual size of mapped region */ + sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */ +#endif }; /* ** Allowed values for winFile.ctrlFlags */ +#define WINFILE_RDONLY 0x02 /* Connection is read only */ #define WINFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */ #define WINFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ @@ -31671,7 +32240,7 @@ static int getLastErrorMsg(DWORD lastErrno, int nBuf, char *zBuf){ } #endif if( 0 == dwLen ){ - sqlite3_snprintf(nBuf, zBuf, "OsError 0x%x (%u)", lastErrno, lastErrno); + sqlite3_snprintf(nBuf, zBuf, "OsError 0x%lx (%lu)", lastErrno, lastErrno); }else{ /* copy a maximum of nBuf chars to output buffer */ sqlite3_snprintf(nBuf, zBuf, "%s", zOut); @@ -31714,7 +32283,7 @@ static int winLogErrorAtLine( for(i=0; zMsg[i] && zMsg[i]!='\r' && zMsg[i]!='\n'; i++){} zMsg[i] = 0; sqlite3_log(errcode, - "os_win.c:%d: (%d) %s(%s) - %s", + "os_win.c:%d: (%lu) %s(%s) - %s", iLine, lastErrno, zFunc, zPath, zMsg ); @@ -32175,6 +32744,8 @@ static int seekWinFile(winFile *pFile, sqlite3_int64 iOffset){ DWORD dwRet; /* Value returned by SetFilePointer() */ DWORD lastErrno; /* Value returned by GetLastError() */ + OSTRACE(("SEEK file=%p, offset=%lld\n", pFile->h, iOffset)); + upperBits = (LONG)((iOffset>>32) & 0x7fffffff); lowerBits = (LONG)(iOffset & 0xffffffff); @@ -32192,9 +32763,11 @@ static int seekWinFile(winFile *pFile, sqlite3_int64 iOffset){ pFile->lastErrno = lastErrno; winLogError(SQLITE_IOERR_SEEK, pFile->lastErrno, "seekWinFile", pFile->zPath); + OSTRACE(("SEEK file=%p, rc=SQLITE_IOERR_SEEK\n", pFile->h)); return 1; } + OSTRACE(("SEEK file=%p, rc=SQLITE_OK\n", pFile->h)); return 0; #else /* @@ -32211,13 +32784,20 @@ static int seekWinFile(winFile *pFile, sqlite3_int64 iOffset){ pFile->lastErrno = osGetLastError(); winLogError(SQLITE_IOERR_SEEK, pFile->lastErrno, "seekWinFile", pFile->zPath); + OSTRACE(("SEEK file=%p, rc=SQLITE_IOERR_SEEK\n", pFile->h)); return 1; } + OSTRACE(("SEEK file=%p, rc=SQLITE_OK\n", pFile->h)); return 0; #endif } +#if SQLITE_MAX_MMAP_SIZE>0 +/* Forward references to VFS methods */ +static int winUnmapfile(winFile*); +#endif + /* ** Close a file. ** @@ -32237,8 +32817,14 @@ static int winClose(sqlite3_file *id){ #ifndef SQLITE_OMIT_WAL assert( pFile->pShm==0 ); #endif - OSTRACE(("CLOSE %d\n", pFile->h)); assert( pFile->h!=NULL && pFile->h!=INVALID_HANDLE_VALUE ); + OSTRACE(("CLOSE file=%p\n", pFile->h)); + +#if SQLITE_MAX_MMAP_SIZE>0 + rc = winUnmapfile(pFile); + if( rc!=SQLITE_OK ) return rc; +#endif + do{ rc = osCloseHandle(pFile->h); /* SimulateIOError( rc=0; cnt=MX_CLOSE_ATTEMPT; ); */ @@ -32258,11 +32844,11 @@ static int winClose(sqlite3_file *id){ sqlite3_free(pFile->zDeleteOnClose); } #endif - OSTRACE(("CLOSE %d %s\n", pFile->h, rc ? "ok" : "failed")); if( rc ){ pFile->h = NULL; } OpenCounter(-1); + OSTRACE(("CLOSE file=%p, rc=%s\n", pFile->h, rc ? "ok" : "failed")); return rc ? SQLITE_OK : winLogError(SQLITE_IOERR_CLOSE, osGetLastError(), "winClose", pFile->zPath); @@ -32287,11 +32873,33 @@ static int winRead( int nRetry = 0; /* Number of retrys */ assert( id!=0 ); + assert( amt>0 ); + assert( offset>=0 ); SimulateIOError(return SQLITE_IOERR_READ); - OSTRACE(("READ %d lock=%d\n", pFile->h, pFile->locktype)); + OSTRACE(("READ file=%p, buffer=%p, amount=%d, offset=%lld, lock=%d\n", + pFile->h, pBuf, amt, offset, pFile->locktype)); + +#if SQLITE_MAX_MMAP_SIZE>0 + /* Deal with as much of this read request as possible by transfering + ** data from the memory mapping using memcpy(). */ + if( offsetmmapSize ){ + if( offset+amt <= pFile->mmapSize ){ + memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt); + OSTRACE(("READ-MMAP file=%p, rc=SQLITE_OK\n", pFile->h)); + return SQLITE_OK; + }else{ + int nCopy = (int)(pFile->mmapSize - offset); + memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy); + pBuf = &((u8 *)pBuf)[nCopy]; + amt -= nCopy; + offset += nCopy; + } + } +#endif #if SQLITE_OS_WINCE if( seekWinFile(pFile, offset) ){ + OSTRACE(("READ file=%p, rc=SQLITE_FULL\n", pFile->h)); return SQLITE_FULL; } while( !osReadFile(pFile->h, pBuf, amt, &nRead, 0) ){ @@ -32305,6 +32913,7 @@ static int winRead( DWORD lastErrno; if( retryIoerr(&nRetry, &lastErrno) ) continue; pFile->lastErrno = lastErrno; + OSTRACE(("READ file=%p, rc=SQLITE_IOERR_READ\n", pFile->h)); return winLogError(SQLITE_IOERR_READ, pFile->lastErrno, "winRead", pFile->zPath); } @@ -32312,9 +32921,11 @@ static int winRead( if( nRead<(DWORD)amt ){ /* Unread parts of the buffer must be zero-filled */ memset(&((char*)pBuf)[nRead], 0, amt-nRead); + OSTRACE(("READ file=%p, rc=SQLITE_IOERR_SHORT_READ\n", pFile->h)); return SQLITE_IOERR_SHORT_READ; } + OSTRACE(("READ file=%p, rc=SQLITE_OK\n", pFile->h)); return SQLITE_OK; } @@ -32337,7 +32948,26 @@ static int winWrite( SimulateIOError(return SQLITE_IOERR_WRITE); SimulateDiskfullError(return SQLITE_FULL); - OSTRACE(("WRITE %d lock=%d\n", pFile->h, pFile->locktype)); + OSTRACE(("WRITE file=%p, buffer=%p, amount=%d, offset=%lld, lock=%d\n", + pFile->h, pBuf, amt, offset, pFile->locktype)); + +#if SQLITE_MAX_MMAP_SIZE>0 + /* Deal with as much of this write request as possible by transfering + ** data from the memory mapping using memcpy(). */ + if( offsetmmapSize ){ + if( offset+amt <= pFile->mmapSize ){ + memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt); + OSTRACE(("WRITE-MMAP file=%p, rc=SQLITE_OK\n", pFile->h)); + return SQLITE_OK; + }else{ + int nCopy = (int)(pFile->mmapSize - offset); + memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy); + pBuf = &((u8 *)pBuf)[nCopy]; + amt -= nCopy; + offset += nCopy; + } + } +#endif #if SQLITE_OS_WINCE rc = seekWinFile(pFile, offset); @@ -32390,13 +33020,16 @@ static int winWrite( if( rc ){ if( ( pFile->lastErrno==ERROR_HANDLE_DISK_FULL ) || ( pFile->lastErrno==ERROR_DISK_FULL )){ + OSTRACE(("WRITE file=%p, rc=SQLITE_FULL\n", pFile->h)); return SQLITE_FULL; } + OSTRACE(("WRITE file=%p, rc=SQLITE_IOERR_WRITE\n", pFile->h)); return winLogError(SQLITE_IOERR_WRITE, pFile->lastErrno, "winWrite", pFile->zPath); }else{ logIoerr(nRetry); } + OSTRACE(("WRITE file=%p, rc=SQLITE_OK\n", pFile->h)); return SQLITE_OK; } @@ -32406,11 +33039,12 @@ static int winWrite( static int winTruncate(sqlite3_file *id, sqlite3_int64 nByte){ winFile *pFile = (winFile*)id; /* File handle object */ int rc = SQLITE_OK; /* Return code for this function */ + DWORD lastErrno; assert( pFile ); - - OSTRACE(("TRUNCATE %d %lld\n", pFile->h, nByte)); SimulateIOError(return SQLITE_IOERR_TRUNCATE); + OSTRACE(("TRUNCATE file=%p, size=%lld, lock=%d\n", + pFile->h, nByte, pFile->locktype)); /* If the user has configured a chunk-size for this file, truncate the ** file so that it consists of an integer number of chunks (i.e. the @@ -32424,14 +33058,25 @@ static int winTruncate(sqlite3_file *id, sqlite3_int64 nByte){ /* SetEndOfFile() returns non-zero when successful, or zero when it fails. */ if( seekWinFile(pFile, nByte) ){ rc = winLogError(SQLITE_IOERR_TRUNCATE, pFile->lastErrno, - "winTruncate1", pFile->zPath); - }else if( 0==osSetEndOfFile(pFile->h) ){ - pFile->lastErrno = osGetLastError(); + "winTruncate1", pFile->zPath); + }else if( 0==osSetEndOfFile(pFile->h) && + ((lastErrno = osGetLastError())!=ERROR_USER_MAPPED_FILE) ){ + pFile->lastErrno = lastErrno; rc = winLogError(SQLITE_IOERR_TRUNCATE, pFile->lastErrno, - "winTruncate2", pFile->zPath); + "winTruncate2", pFile->zPath); } - OSTRACE(("TRUNCATE %d %lld %s\n", pFile->h, nByte, rc ? "failed" : "ok")); +#if SQLITE_MAX_MMAP_SIZE>0 + /* If the file was truncated to a size smaller than the currently + ** mapped region, reduce the effective mapping size as well. SQLite will + ** use read() and write() to access data beyond this point from now on. + */ + if( pFile->pMapRegion && nBytemmapSize ){ + pFile->mmapSize = nByte; + } +#endif + + OSTRACE(("TRUNCATE file=%p, rc=%s\n", pFile->h, sqlite3ErrName(rc))); return rc; } @@ -32471,13 +33116,14 @@ static int winSync(sqlite3_file *id, int flags){ || (flags&0x0F)==SQLITE_SYNC_FULL ); - OSTRACE(("SYNC %d lock=%d\n", pFile->h, pFile->locktype)); - /* Unix cannot, but some systems may return SQLITE_FULL from here. This ** line is to test that doing so does not cause any problems. */ SimulateDiskfullError( return SQLITE_FULL ); + OSTRACE(("SYNC file=%p, flags=%x, lock=%d\n", + pFile->h, flags, pFile->locktype)); + #ifndef SQLITE_TEST UNUSED_PARAMETER(flags); #else @@ -32496,9 +33142,11 @@ static int winSync(sqlite3_file *id, int flags){ rc = osFlushFileBuffers(pFile->h); SimulateIOError( rc=FALSE ); if( rc ){ + OSTRACE(("SYNC file=%p, rc=SQLITE_OK\n", pFile->h)); return SQLITE_OK; }else{ pFile->lastErrno = osGetLastError(); + OSTRACE(("SYNC file=%p, rc=SQLITE_IOERR_FSYNC\n", pFile->h)); return winLogError(SQLITE_IOERR_FSYNC, pFile->lastErrno, "winSync", pFile->zPath); } @@ -32513,7 +33161,10 @@ static int winFileSize(sqlite3_file *id, sqlite3_int64 *pSize){ int rc = SQLITE_OK; assert( id!=0 ); + assert( pSize!=0 ); SimulateIOError(return SQLITE_IOERR_FSTAT); + OSTRACE(("SIZE file=%p, pSize=%p\n", pFile->h, pSize)); + #if SQLITE_OS_WINRT { FILE_STANDARD_INFO info; @@ -32542,6 +33193,8 @@ static int winFileSize(sqlite3_file *id, sqlite3_int64 *pSize){ } } #endif + OSTRACE(("SIZE file=%p, pSize=%p, *pSize=%lld, rc=%s\n", + pFile->h, pSize, *pSize, sqlite3ErrName(rc))); return rc; } @@ -32583,6 +33236,7 @@ static int winFileSize(sqlite3_file *id, sqlite3_int64 *pSize){ */ static int getReadLock(winFile *pFile){ int res; + OSTRACE(("READ-LOCK file=%p, lock=%d\n", pFile->h, pFile->locktype)); if( isNT() ){ #if SQLITE_OS_WINCE /* @@ -32608,6 +33262,7 @@ static int getReadLock(winFile *pFile){ pFile->lastErrno = osGetLastError(); /* No need to log a failure to lock */ } + OSTRACE(("READ-LOCK file=%p, rc=%s\n", pFile->h, sqlite3ErrName(res))); return res; } @@ -32617,6 +33272,7 @@ static int getReadLock(winFile *pFile){ static int unlockReadLock(winFile *pFile){ int res; DWORD lastErrno; + OSTRACE(("READ-UNLOCK file=%p, lock=%d\n", pFile->h, pFile->locktype)); if( isNT() ){ res = winUnlockFile(&pFile->h, SHARED_FIRST, 0, SHARED_SIZE, 0); } @@ -32630,6 +33286,7 @@ static int unlockReadLock(winFile *pFile){ winLogError(SQLITE_IOERR_UNLOCK, pFile->lastErrno, "unlockReadLock", pFile->zPath); } + OSTRACE(("READ-UNLOCK file=%p, rc=%s\n", pFile->h, sqlite3ErrName(res))); return res; } @@ -32668,14 +33325,15 @@ static int winLock(sqlite3_file *id, int locktype){ DWORD lastErrno = NO_ERROR; assert( id!=0 ); - OSTRACE(("LOCK %d %d was %d(%d)\n", - pFile->h, locktype, pFile->locktype, pFile->sharedLockByte)); + OSTRACE(("LOCK file=%p, oldLock=%d(%d), newLock=%d\n", + pFile->h, pFile->locktype, pFile->sharedLockByte, locktype)); /* If there is already a lock of this type or more restrictive on the ** OsFile, do nothing. Don't use the end_lock: exit path, as ** sqlite3OsEnterMutex() hasn't been called yet. */ if( pFile->locktype>=locktype ){ + OSTRACE(("LOCK-HELD file=%p, rc=SQLITE_OK\n", pFile->h)); return SQLITE_OK; } @@ -32703,7 +33361,8 @@ static int winLock(sqlite3_file *id, int locktype){ ** If you are using this code as a model for alternative VFSes, do not ** copy this retry logic. It is a hack intended for Windows only. */ - OSTRACE(("could not get a PENDING lock. cnt=%d\n", cnt)); + OSTRACE(("LOCK-PENDING-FAIL file=%p, count=%d, rc=%s\n", + pFile->h, cnt, sqlite3ErrName(res))); if( cnt ) sqlite3_win32_sleep(1); } gotPendingLock = res; @@ -32748,14 +33407,12 @@ static int winLock(sqlite3_file *id, int locktype){ if( locktype==EXCLUSIVE_LOCK && res ){ assert( pFile->locktype>=SHARED_LOCK ); res = unlockReadLock(pFile); - OSTRACE(("unreadlock = %d\n", res)); res = winLockFile(&pFile->h, SQLITE_LOCKFILE_FLAGS, SHARED_FIRST, 0, SHARED_SIZE, 0); if( res ){ newLocktype = EXCLUSIVE_LOCK; }else{ lastErrno = osGetLastError(); - OSTRACE(("error-code = %d\n", lastErrno)); getReadLock(pFile); } } @@ -32773,12 +33430,14 @@ static int winLock(sqlite3_file *id, int locktype){ if( res ){ rc = SQLITE_OK; }else{ - OSTRACE(("LOCK FAILED %d trying for %d but got %d\n", pFile->h, - locktype, newLocktype)); + OSTRACE(("LOCK-FAIL file=%p, wanted=%d, got=%d\n", + pFile->h, locktype, newLocktype)); pFile->lastErrno = lastErrno; rc = SQLITE_BUSY; } pFile->locktype = (u8)newLocktype; + OSTRACE(("LOCK file=%p, lock=%d, rc=%s\n", + pFile->h, pFile->locktype, sqlite3ErrName(rc))); return rc; } @@ -32792,20 +33451,23 @@ static int winCheckReservedLock(sqlite3_file *id, int *pResOut){ winFile *pFile = (winFile*)id; SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); + OSTRACE(("TEST-WR-LOCK file=%p, pResOut=%p\n", pFile->h, pResOut)); assert( id!=0 ); if( pFile->locktype>=RESERVED_LOCK ){ rc = 1; - OSTRACE(("TEST WR-LOCK %d %d (local)\n", pFile->h, rc)); + OSTRACE(("TEST-WR-LOCK file=%p, rc=%d (local)\n", pFile->h, rc)); }else{ rc = winLockFile(&pFile->h, SQLITE_LOCKFILEEX_FLAGS,RESERVED_BYTE, 0, 1, 0); if( rc ){ winUnlockFile(&pFile->h, RESERVED_BYTE, 0, 1, 0); } rc = !rc; - OSTRACE(("TEST WR-LOCK %d %d (remote)\n", pFile->h, rc)); + OSTRACE(("TEST-WR-LOCK file=%p, rc=%d (remote)\n", pFile->h, rc)); } *pResOut = rc; + OSTRACE(("TEST-WR-LOCK file=%p, pResOut=%p, *pResOut=%d, rc=SQLITE_OK\n", + pFile->h, pResOut, *pResOut)); return SQLITE_OK; } @@ -32826,8 +33488,8 @@ static int winUnlock(sqlite3_file *id, int locktype){ int rc = SQLITE_OK; assert( pFile!=0 ); assert( locktype<=SHARED_LOCK ); - OSTRACE(("UNLOCK %d to %d was %d(%d)\n", pFile->h, locktype, - pFile->locktype, pFile->sharedLockByte)); + OSTRACE(("UNLOCK file=%p, oldLock=%d(%d), newLock=%d\n", + pFile->h, pFile->locktype, pFile->sharedLockByte, locktype)); type = pFile->locktype; if( type>=EXCLUSIVE_LOCK ){ winUnlockFile(&pFile->h, SHARED_FIRST, 0, SHARED_SIZE, 0); @@ -32848,6 +33510,8 @@ static int winUnlock(sqlite3_file *id, int locktype){ winUnlockFile(&pFile->h, PENDING_BYTE, 0, 1, 0); } pFile->locktype = (u8)locktype; + OSTRACE(("UNLOCK file=%p, lock=%d, rc=%s\n", + pFile->h, pFile->locktype, sqlite3ErrName(rc))); return rc; } @@ -32875,17 +33539,21 @@ static int getTempname(int nBuf, char *zBuf); */ static int winFileControl(sqlite3_file *id, int op, void *pArg){ winFile *pFile = (winFile*)id; + OSTRACE(("FCNTL file=%p, op=%d, pArg=%p\n", pFile->h, op, pArg)); switch( op ){ case SQLITE_FCNTL_LOCKSTATE: { *(int*)pArg = pFile->locktype; + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); return SQLITE_OK; } case SQLITE_LAST_ERRNO: { *(int*)pArg = (int)pFile->lastErrno; + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); return SQLITE_OK; } case SQLITE_FCNTL_CHUNK_SIZE: { pFile->szChunk = *(int *)pArg; + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); return SQLITE_OK; } case SQLITE_FCNTL_SIZE_HINT: { @@ -32900,20 +33568,25 @@ static int winFileControl(sqlite3_file *id, int op, void *pArg){ SimulateIOErrorBenign(0); } } + OSTRACE(("FCNTL file=%p, rc=%s\n", pFile->h, sqlite3ErrName(rc))); return rc; } + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); return SQLITE_OK; } case SQLITE_FCNTL_PERSIST_WAL: { winModeBit(pFile, WINFILE_PERSIST_WAL, (int*)pArg); + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); return SQLITE_OK; } case SQLITE_FCNTL_POWERSAFE_OVERWRITE: { winModeBit(pFile, WINFILE_PSOW, (int*)pArg); + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); return SQLITE_OK; } case SQLITE_FCNTL_VFSNAME: { *(char**)pArg = sqlite3_mprintf("win32"); + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); return SQLITE_OK; } case SQLITE_FCNTL_WIN32_AV_RETRY: { @@ -32928,6 +33601,7 @@ static int winFileControl(sqlite3_file *id, int op, void *pArg){ }else{ a[1] = win32IoerrRetryDelay; } + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); return SQLITE_OK; } case SQLITE_FCNTL_TEMPFILENAME: { @@ -32936,9 +33610,23 @@ static int winFileControl(sqlite3_file *id, int op, void *pArg){ getTempname(pFile->pVfs->mxPathname, zTFile); *(char**)pArg = zTFile; } + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); return SQLITE_OK; } +#if SQLITE_MAX_MMAP_SIZE>0 + case SQLITE_FCNTL_MMAP_SIZE: { + i64 newLimit = *(i64*)pArg; + if( newLimit>sqlite3GlobalConfig.mxMmap ){ + newLimit = sqlite3GlobalConfig.mxMmap; + } + *(i64*)pArg = pFile->mmapSizeMax; + if( newLimit>=0 ) pFile->mmapSizeMax = newLimit; + OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); + return SQLITE_OK; + } +#endif } + OSTRACE(("FCNTL file=%p, rc=SQLITE_NOTFOUND\n", pFile->h)); return SQLITE_NOTFOUND; } @@ -32966,8 +33654,6 @@ static int winDeviceCharacteristics(sqlite3_file *id){ ((p->ctrlFlags & WINFILE_PSOW)?SQLITE_IOCAP_POWERSAFE_OVERWRITE:0); } -#ifndef SQLITE_OMIT_WAL - /* ** Windows will only let you create file view mappings ** on allocation size granularity boundaries. @@ -32976,6 +33662,8 @@ static int winDeviceCharacteristics(sqlite3_file *id){ */ SYSTEM_INFO winSysInfo; +#ifndef SQLITE_OMIT_WAL + /* ** Helper functions to obtain and relinquish the global mutex. The ** global mutex is used to protect the winLockInfo objects used by @@ -33099,6 +33787,9 @@ static int winShmSystemLock( /* Access to the winShmNode object is serialized by the caller */ assert( sqlite3_mutex_held(pFile->mutex) || pFile->nRef==0 ); + OSTRACE(("SHM-LOCK file=%p, lock=%d, offset=%d, size=%d\n", + pFile->hFile.h, lockType, ofst, nByte)); + /* Release/Acquire the system-level lock */ if( lockType==_SHM_UNLCK ){ rc = winUnlockFile(&pFile->hFile.h, ofst, 0, nByte, 0); @@ -33116,11 +33807,9 @@ static int winShmSystemLock( rc = SQLITE_BUSY; } - OSTRACE(("SHM-LOCK %d %s %s 0x%08lx\n", - pFile->hFile.h, - rc==SQLITE_OK ? "ok" : "failed", - lockType==_SHM_UNLCK ? "UnlockFileEx" : "LockFileEx", - pFile->lastErrno)); + OSTRACE(("SHM-LOCK file=%p, func=%s, errno=%lu, rc=%s\n", + pFile->hFile.h, (lockType == _SHM_UNLCK) ? "winUnlockFile" : + "winLockFile", pFile->lastErrno, sqlite3ErrName(rc))); return rc; } @@ -33140,6 +33829,8 @@ static void winShmPurge(sqlite3_vfs *pVfs, int deleteFlag){ winShmNode *p; BOOL bRc; assert( winShmMutexHeld() ); + OSTRACE(("SHM-PURGE pid=%lu, deleteFlag=%d\n", + osGetCurrentProcessId(), deleteFlag)); pp = &winShmNodeList; while( (p = *pp)!=0 ){ if( p->nRef==0 ){ @@ -33147,13 +33838,11 @@ static void winShmPurge(sqlite3_vfs *pVfs, int deleteFlag){ if( p->mutex ) sqlite3_mutex_free(p->mutex); for(i=0; inRegion; i++){ bRc = osUnmapViewOfFile(p->aRegion[i].pMap); - OSTRACE(("SHM-PURGE pid-%d unmap region=%d %s\n", - (int)osGetCurrentProcessId(), i, - bRc ? "ok" : "failed")); + OSTRACE(("SHM-PURGE-UNMAP pid=%lu, region=%d, rc=%s\n", + osGetCurrentProcessId(), i, bRc ? "ok" : "failed")); bRc = osCloseHandle(p->aRegion[i].hMap); - OSTRACE(("SHM-PURGE pid-%d close region=%d %s\n", - (int)osGetCurrentProcessId(), i, - bRc ? "ok" : "failed")); + OSTRACE(("SHM-PURGE-CLOSE pid=%lu, region=%d, rc=%s\n", + osGetCurrentProcessId(), i, bRc ? "ok" : "failed")); } if( p->hFile.h!=NULL && p->hFile.h!=INVALID_HANDLE_VALUE ){ SimulateIOErrorBenign(1); @@ -33432,9 +34121,9 @@ static int winShmLock( } } sqlite3_mutex_leave(pShmNode->mutex); - OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %03x,%03x %s\n", - p->id, (int)osGetCurrentProcessId(), p->sharedMask, p->exclMask, - rc ? "failed" : "ok")); + OSTRACE(("SHM-LOCK pid=%lu, id=%d, sharedMask=%03x, exclMask=%03x, rc=%s\n", + osGetCurrentProcessId(), p->id, p->sharedMask, p->exclMask, + sqlite3ErrName(rc))); return rc; } @@ -33555,8 +34244,8 @@ static int winShmMap( NULL, PAGE_READWRITE, 0, nByte, NULL ); #endif - OSTRACE(("SHM-MAP pid-%d create region=%d nbyte=%d %s\n", - (int)osGetCurrentProcessId(), pShmNode->nRegion, nByte, + OSTRACE(("SHM-MAP-CREATE pid=%lu, region=%d, size=%d, rc=%s\n", + osGetCurrentProcessId(), pShmNode->nRegion, nByte, hMap ? "ok" : "failed")); if( hMap ){ int iOffset = pShmNode->nRegion*szRegion; @@ -33570,8 +34259,8 @@ static int winShmMap( 0, iOffset - iOffsetShift, szRegion + iOffsetShift ); #endif - OSTRACE(("SHM-MAP pid-%d map region=%d offset=%d size=%d %s\n", - (int)osGetCurrentProcessId(), pShmNode->nRegion, iOffset, + OSTRACE(("SHM-MAP-MAP pid=%lu, region=%d, offset=%d, size=%d, rc=%s\n", + osGetCurrentProcessId(), pShmNode->nRegion, iOffset, szRegion, pMap ? "ok" : "failed")); } if( !pMap ){ @@ -33608,6 +34297,230 @@ shmpage_out: # define winShmUnmap 0 #endif /* #ifndef SQLITE_OMIT_WAL */ +/* +** Cleans up the mapped region of the specified file, if any. +*/ +#if SQLITE_MAX_MMAP_SIZE>0 +static int winUnmapfile(winFile *pFile){ + assert( pFile!=0 ); + OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, hMap=%p, pMapRegion=%p, " + "mmapSize=%lld, mmapSizeActual=%lld, mmapSizeMax=%lld\n", + osGetCurrentProcessId(), pFile, pFile->hMap, pFile->pMapRegion, + pFile->mmapSize, pFile->mmapSizeActual, pFile->mmapSizeMax)); + if( pFile->pMapRegion ){ + if( !osUnmapViewOfFile(pFile->pMapRegion) ){ + pFile->lastErrno = osGetLastError(); + OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, pMapRegion=%p, " + "rc=SQLITE_IOERR_MMAP\n", osGetCurrentProcessId(), pFile, + pFile->pMapRegion)); + return winLogError(SQLITE_IOERR_MMAP, pFile->lastErrno, + "winUnmap1", pFile->zPath); + } + pFile->pMapRegion = 0; + pFile->mmapSize = 0; + pFile->mmapSizeActual = 0; + } + if( pFile->hMap!=NULL ){ + if( !osCloseHandle(pFile->hMap) ){ + pFile->lastErrno = osGetLastError(); + OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, hMap=%p, rc=SQLITE_IOERR_MMAP\n", + osGetCurrentProcessId(), pFile, pFile->hMap)); + return winLogError(SQLITE_IOERR_MMAP, pFile->lastErrno, + "winUnmap2", pFile->zPath); + } + pFile->hMap = NULL; + } + OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), pFile)); + return SQLITE_OK; +} + +/* +** Memory map or remap the file opened by file-descriptor pFd (if the file +** is already mapped, the existing mapping is replaced by the new). Or, if +** there already exists a mapping for this file, and there are still +** outstanding xFetch() references to it, this function is a no-op. +** +** If parameter nByte is non-negative, then it is the requested size of +** the mapping to create. Otherwise, if nByte is less than zero, then the +** requested size is the size of the file on disk. The actual size of the +** created mapping is either the requested size or the value configured +** using SQLITE_FCNTL_MMAP_SIZE, whichever is smaller. +** +** SQLITE_OK is returned if no error occurs (even if the mapping is not +** recreated as a result of outstanding references) or an SQLite error +** code otherwise. +*/ +static int winMapfile(winFile *pFd, sqlite3_int64 nByte){ + sqlite3_int64 nMap = nByte; + int rc; + + assert( nMap>=0 || pFd->nFetchOut==0 ); + OSTRACE(("MAP-FILE pid=%lu, pFile=%p, size=%lld\n", + osGetCurrentProcessId(), pFd, nByte)); + + if( pFd->nFetchOut>0 ) return SQLITE_OK; + + if( nMap<0 ){ + rc = winFileSize((sqlite3_file*)pFd, &nMap); + if( rc ){ + OSTRACE(("MAP-FILE pid=%lu, pFile=%p, rc=SQLITE_IOERR_FSTAT\n", + osGetCurrentProcessId(), pFd)); + return SQLITE_IOERR_FSTAT; + } + } + if( nMap>pFd->mmapSizeMax ){ + nMap = pFd->mmapSizeMax; + } + nMap &= ~(sqlite3_int64)(winSysInfo.dwPageSize - 1); + + if( nMap==0 && pFd->mmapSize>0 ){ + winUnmapfile(pFd); + } + if( nMap!=pFd->mmapSize ){ + void *pNew = 0; + DWORD protect = PAGE_READONLY; + DWORD flags = FILE_MAP_READ; + + winUnmapfile(pFd); + if( (pFd->ctrlFlags & WINFILE_RDONLY)==0 ){ + protect = PAGE_READWRITE; + flags |= FILE_MAP_WRITE; + } +#if SQLITE_OS_WINRT + pFd->hMap = osCreateFileMappingFromApp(pFd->h, NULL, protect, nMap, NULL); +#elif defined(SQLITE_WIN32_HAS_WIDE) + pFd->hMap = osCreateFileMappingW(pFd->h, NULL, protect, + (DWORD)((nMap>>32) & 0xffffffff), + (DWORD)(nMap & 0xffffffff), NULL); +#elif defined(SQLITE_WIN32_HAS_ANSI) + pFd->hMap = osCreateFileMappingA(pFd->h, NULL, protect, + (DWORD)((nMap>>32) & 0xffffffff), + (DWORD)(nMap & 0xffffffff), NULL); +#endif + if( pFd->hMap==NULL ){ + pFd->lastErrno = osGetLastError(); + rc = winLogError(SQLITE_IOERR_MMAP, pFd->lastErrno, + "winMapfile", pFd->zPath); + /* Log the error, but continue normal operation using xRead/xWrite */ + OSTRACE(("MAP-FILE-CREATE pid=%lu, pFile=%p, rc=SQLITE_IOERR_MMAP\n", + osGetCurrentProcessId(), pFd)); + return SQLITE_OK; + } + assert( (nMap % winSysInfo.dwPageSize)==0 ); +#if SQLITE_OS_WINRT + pNew = osMapViewOfFileFromApp(pFd->hMap, flags, 0, nMap); +#else + assert( sizeof(SIZE_T)==sizeof(sqlite3_int64) || nMap<=0xffffffff ); + pNew = osMapViewOfFile(pFd->hMap, flags, 0, 0, (SIZE_T)nMap); +#endif + if( pNew==NULL ){ + osCloseHandle(pFd->hMap); + pFd->hMap = NULL; + pFd->lastErrno = osGetLastError(); + winLogError(SQLITE_IOERR_MMAP, pFd->lastErrno, + "winMapfile", pFd->zPath); + OSTRACE(("MAP-FILE-MAP pid=%lu, pFile=%p, rc=SQLITE_IOERR_MMAP\n", + osGetCurrentProcessId(), pFd)); + return SQLITE_OK; + } + pFd->pMapRegion = pNew; + pFd->mmapSize = nMap; + pFd->mmapSizeActual = nMap; + } + + OSTRACE(("MAP-FILE pid=%lu, pFile=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), pFd)); + return SQLITE_OK; +} +#endif /* SQLITE_MAX_MMAP_SIZE>0 */ + +/* +** If possible, return a pointer to a mapping of file fd starting at offset +** iOff. The mapping must be valid for at least nAmt bytes. +** +** If such a pointer can be obtained, store it in *pp and return SQLITE_OK. +** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK. +** Finally, if an error does occur, return an SQLite error code. The final +** value of *pp is undefined in this case. +** +** If this function does return a pointer, the caller must eventually +** release the reference by calling winUnfetch(). +*/ +static int winFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){ +#if SQLITE_MAX_MMAP_SIZE>0 + winFile *pFd = (winFile*)fd; /* The underlying database file */ +#endif + *pp = 0; + + OSTRACE(("FETCH pid=%lu, pFile=%p, offset=%lld, amount=%d, pp=%p\n", + osGetCurrentProcessId(), fd, iOff, nAmt, pp)); + +#if SQLITE_MAX_MMAP_SIZE>0 + if( pFd->mmapSizeMax>0 ){ + if( pFd->pMapRegion==0 ){ + int rc = winMapfile(pFd, -1); + if( rc!=SQLITE_OK ){ + OSTRACE(("FETCH pid=%lu, pFile=%p, rc=%s\n", + osGetCurrentProcessId(), pFd, sqlite3ErrName(rc))); + return rc; + } + } + if( pFd->mmapSize >= iOff+nAmt ){ + *pp = &((u8 *)pFd->pMapRegion)[iOff]; + pFd->nFetchOut++; + } + } +#endif + + OSTRACE(("FETCH pid=%lu, pFile=%p, pp=%p, *pp=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), fd, pp, *pp)); + return SQLITE_OK; +} + +/* +** If the third argument is non-NULL, then this function releases a +** reference obtained by an earlier call to winFetch(). The second +** argument passed to this function must be the same as the corresponding +** argument that was passed to the winFetch() invocation. +** +** Or, if the third argument is NULL, then this function is being called +** to inform the VFS layer that, according to POSIX, any existing mapping +** may now be invalid and should be unmapped. +*/ +static int winUnfetch(sqlite3_file *fd, i64 iOff, void *p){ +#if SQLITE_MAX_MMAP_SIZE>0 + winFile *pFd = (winFile*)fd; /* The underlying database file */ + + /* If p==0 (unmap the entire file) then there must be no outstanding + ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference), + ** then there must be at least one outstanding. */ + assert( (p==0)==(pFd->nFetchOut==0) ); + + /* If p!=0, it must match the iOff value. */ + assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] ); + + OSTRACE(("UNFETCH pid=%lu, pFile=%p, offset=%lld, p=%p\n", + osGetCurrentProcessId(), pFd, iOff, p)); + + if( p ){ + pFd->nFetchOut--; + }else{ + /* FIXME: If Windows truly always prevents truncating or deleting a + ** file while a mapping is held, then the following winUnmapfile() call + ** is unnecessary can can be omitted - potentially improving + ** performance. */ + winUnmapfile(pFd); + } + + assert( pFd->nFetchOut>=0 ); +#endif + + OSTRACE(("UNFETCH pid=%lu, pFile=%p, rc=SQLITE_OK\n", + osGetCurrentProcessId(), fd)); + return SQLITE_OK; +} + /* ** Here ends the implementation of all sqlite3_file methods. ** @@ -33619,7 +34532,7 @@ shmpage_out: ** sqlite3_file for win32. */ static const sqlite3_io_methods winIoMethod = { - 2, /* iVersion */ + 3, /* iVersion */ winClose, /* xClose */ winRead, /* xRead */ winWrite, /* xWrite */ @@ -33635,7 +34548,9 @@ static const sqlite3_io_methods winIoMethod = { winShmMap, /* xShmMap */ winShmLock, /* xShmLock */ winShmBarrier, /* xShmBarrier */ - winShmUnmap /* xShmUnmap */ + winShmUnmap, /* xShmUnmap */ + winFetch, /* xFetch */ + winUnfetch /* xUnfetch */ }; /**************************************************************************** @@ -33699,6 +34614,7 @@ static int getTempname(int nBuf, char *zBuf){ sqlite3_snprintf(MAX_PATH-30, zTempPath, "%s", zMulti); sqlite3_free(zMulti); }else{ + OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_NOMEM\n")); return SQLITE_IOERR_NOMEM; } } @@ -33712,6 +34628,7 @@ static int getTempname(int nBuf, char *zBuf){ sqlite3_snprintf(MAX_PATH-30, zTempPath, "%s", zUtf8); sqlite3_free(zUtf8); }else{ + OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_NOMEM\n")); return SQLITE_IOERR_NOMEM; } } @@ -33724,6 +34641,7 @@ static int getTempname(int nBuf, char *zBuf){ nTempPath = sqlite3Strlen30(zTempPath); if( (nTempPath + sqlite3Strlen30(SQLITE_TEMP_FILE_PREFIX) + 18) >= nBuf ){ + OSTRACE(("TEMP-FILENAME rc=SQLITE_ERROR\n")); return SQLITE_ERROR; } @@ -33741,8 +34659,8 @@ static int getTempname(int nBuf, char *zBuf){ zBuf[j] = 0; zBuf[j+1] = 0; - OSTRACE(("TEMP FILENAME: %s\n", zBuf)); - return SQLITE_OK; + OSTRACE(("TEMP-FILENAME name=%s, rc=SQLITE_OK\n", zBuf)); + return SQLITE_OK; } /* @@ -33811,9 +34729,7 @@ static int winOpen( int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE); int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE); int isCreate = (flags & SQLITE_OPEN_CREATE); -#ifndef NDEBUG int isReadonly = (flags & SQLITE_OPEN_READONLY); -#endif int isReadWrite = (flags & SQLITE_OPEN_READWRITE); #ifndef NDEBUG @@ -33824,6 +34740,9 @@ static int winOpen( )); #endif + OSTRACE(("OPEN name=%s, pFile=%p, flags=%x, pOutFlags=%p\n", + zUtf8Name, id, flags, pOutFlags)); + /* Check the following statements are true: ** ** (a) Exactly one of the READWRITE and READONLY flags must be set, and @@ -33869,6 +34788,7 @@ static int winOpen( memset(zTmpname, 0, MAX_PATH+2); rc = getTempname(MAX_PATH+2, zTmpname); if( rc!=SQLITE_OK ){ + OSTRACE(("OPEN name=%s, rc=%s", zUtf8Name, sqlite3ErrName(rc))); return rc; } zUtf8Name = zTmpname; @@ -33884,11 +34804,13 @@ static int winOpen( /* Convert the filename to the system encoding. */ zConverted = convertUtf8Filename(zUtf8Name); if( zConverted==0 ){ + OSTRACE(("OPEN name=%s, rc=SQLITE_IOERR_NOMEM", zUtf8Name)); return SQLITE_IOERR_NOMEM; } if( winIsDir(zConverted) ){ sqlite3_free(zConverted); + OSTRACE(("OPEN name=%s, rc=SQLITE_CANTOPEN_ISDIR", zUtf8Name)); return SQLITE_CANTOPEN_ISDIR; } @@ -33979,9 +34901,8 @@ static int winOpen( #endif logIoerr(cnt); - OSTRACE(("OPEN %d %s 0x%lx %s\n", - h, zName, dwDesiredAccess, - h==INVALID_HANDLE_VALUE ? "failed" : "ok")); + OSTRACE(("OPEN file=%p, name=%s, access=%lx, rc=%s\n", h, zUtf8Name, + dwDesiredAccess, (h==INVALID_HANDLE_VALUE) ? "failed" : "ok")); if( h==INVALID_HANDLE_VALUE ){ pFile->lastErrno = lastErrno; @@ -34005,12 +34926,17 @@ static int winOpen( } } + OSTRACE(("OPEN file=%p, name=%s, access=%lx, pOutFlags=%p, *pOutFlags=%d, " + "rc=%s\n", h, zUtf8Name, dwDesiredAccess, pOutFlags, pOutFlags ? + *pOutFlags : 0, (h==INVALID_HANDLE_VALUE) ? "failed" : "ok")); + #if SQLITE_OS_WINCE if( isReadWrite && eType==SQLITE_OPEN_MAIN_DB && (rc = winceCreateLock(zName, pFile))!=SQLITE_OK ){ osCloseHandle(h); sqlite3_free(zConverted); + OSTRACE(("OPEN-CE-LOCK name=%s, rc=%s\n", zName, sqlite3ErrName(rc))); return rc; } if( isTemp ){ @@ -34024,11 +34950,21 @@ static int winOpen( pFile->pMethod = &winIoMethod; pFile->pVfs = pVfs; pFile->h = h; + if( isReadonly ){ + pFile->ctrlFlags |= WINFILE_RDONLY; + } if( sqlite3_uri_boolean(zName, "psow", SQLITE_POWERSAFE_OVERWRITE) ){ pFile->ctrlFlags |= WINFILE_PSOW; } pFile->lastErrno = NO_ERROR; pFile->zPath = zName; +#if SQLITE_MAX_MMAP_SIZE>0 + pFile->hMap = NULL; + pFile->pMapRegion = 0; + pFile->mmapSize = 0; + pFile->mmapSizeActual = 0; + pFile->mmapSizeMax = sqlite3GlobalConfig.szMmap; +#endif OpenCounter(+1); return rc; @@ -34060,6 +34996,8 @@ static int winDelete( UNUSED_PARAMETER(syncDir); SimulateIOError(return SQLITE_IOERR_DELETE); + OSTRACE(("DELETE name=%s, syncDir=%d\n", zFilename, syncDir)); + zConverted = convertUtf8Filename(zFilename); if( zConverted==0 ){ return SQLITE_IOERR_NOMEM; @@ -34145,7 +35083,7 @@ static int winDelete( logIoerr(cnt); } sqlite3_free(zConverted); - OSTRACE(("DELETE \"%s\" %s\n", zFilename, (rc ? "failed" : "ok" ))); + OSTRACE(("DELETE name=%s, rc=%s\n", zFilename, sqlite3ErrName(rc))); return rc; } @@ -34165,8 +35103,12 @@ static int winAccess( UNUSED_PARAMETER(pVfs); SimulateIOError( return SQLITE_IOERR_ACCESS; ); + OSTRACE(("ACCESS name=%s, flags=%x, pResOut=%p\n", + zFilename, flags, pResOut)); + zConverted = convertUtf8Filename(zFilename); if( zConverted==0 ){ + OSTRACE(("ACCESS name=%s, rc=SQLITE_IOERR_NOMEM\n", zFilename)); return SQLITE_IOERR_NOMEM; } if( isNT() ){ @@ -34217,6 +35159,8 @@ static int winAccess( assert(!"Invalid flags argument"); } *pResOut = rc; + OSTRACE(("ACCESS name=%s, pResOut=%p, *pResOut=%d, rc=SQLITE_OK\n", + zFilename, pResOut, *pResOut)); return SQLITE_OK; } @@ -34657,7 +35601,6 @@ SQLITE_API int sqlite3_os_init(void){ ** correctly. See ticket [bb3a86e890c8e96ab] */ assert( ArraySize(aSyscall)==74 ); -#ifndef SQLITE_OMIT_WAL /* get memory map allocation granularity */ memset(&winSysInfo, 0, sizeof(SYSTEM_INFO)); #if SQLITE_OS_WINRT @@ -34665,8 +35608,8 @@ SQLITE_API int sqlite3_os_init(void){ #else osGetSystemInfo(&winSysInfo); #endif - assert(winSysInfo.dwAllocationGranularity > 0); -#endif + assert( winSysInfo.dwAllocationGranularity>0 ); + assert( winSysInfo.dwPageSize>0 ); sqlite3_vfs_register(&winVfs, 1); return SQLITE_OK; @@ -37303,7 +38246,6 @@ SQLITE_PRIVATE int sqlite3RowSetTest(RowSet *pRowSet, u8 iBatch, sqlite3_int64 i # define sqlite3WalClose(w,x,y,z) 0 # define sqlite3WalBeginReadTransaction(y,z) 0 # define sqlite3WalEndReadTransaction(z) -# define sqlite3WalRead(v,w,x,y,z) 0 # define sqlite3WalDbsize(y) 0 # define sqlite3WalBeginWriteTransaction(y) 0 # define sqlite3WalEndWriteTransaction(x) 0 @@ -37316,6 +38258,7 @@ SQLITE_PRIVATE int sqlite3RowSetTest(RowSet *pRowSet, u8 iBatch, sqlite3_int64 i # define sqlite3WalExclusiveMode(y,z) 0 # define sqlite3WalHeapMemory(z) 0 # define sqlite3WalFramesize(z) 0 +# define sqlite3WalFindFrame(x,y,z) 0 #else #define WAL_SAVEPOINT_NDATA 4 @@ -37343,7 +38286,8 @@ SQLITE_PRIVATE int sqlite3WalBeginReadTransaction(Wal *pWal, int *); SQLITE_PRIVATE void sqlite3WalEndReadTransaction(Wal *pWal); /* Read a page from the write-ahead log, if it is present. */ -SQLITE_PRIVATE int sqlite3WalRead(Wal *pWal, Pgno pgno, int *pInWal, int nOut, u8 *pOut); +SQLITE_PRIVATE int sqlite3WalFindFrame(Wal *, Pgno, u32 *); +SQLITE_PRIVATE int sqlite3WalReadFrame(Wal *, u32, int, u8 *); /* If the WAL is not empty, return the size of the database. */ SQLITE_PRIVATE Pgno sqlite3WalDbsize(Wal *pWal); @@ -38043,6 +38987,11 @@ struct Pager { PagerSavepoint *aSavepoint; /* Array of active savepoints */ int nSavepoint; /* Number of elements in aSavepoint[] */ char dbFileVers[16]; /* Changes whenever database file changes */ + + u8 bUseFetch; /* True to use xFetch() */ + int nMmapOut; /* Number of mmap pages currently outstanding */ + sqlite3_int64 szMmap; /* Desired maximum mmap size */ + PgHdr *pMmapFreelist; /* List of free mmap page headers (pDirty) */ /* ** End of the routinely-changing class members ***************************************************************************/ @@ -38153,6 +39102,16 @@ static const unsigned char aJournalMagic[] = { # define MEMDB pPager->memDb #endif +/* +** The macro USEFETCH is true if we are allowed to use the xFetch and xUnfetch +** interfaces to access the database using memory-mapped I/O. +*/ +#if SQLITE_MAX_MMAP_SIZE>0 +# define USEFETCH(x) ((x)->bUseFetch) +#else +# define USEFETCH(x) 0 +#endif + /* ** The maximum legal page number is (2^31 - 1). */ @@ -39640,7 +40599,7 @@ static int pager_playback_one_page( i64 ofst = (pgno-1)*(i64)pPager->pageSize; testcase( !isSavepnt && pPg!=0 && (pPg->flags&PGHDR_NEED_SYNC)!=0 ); assert( !pagerUseWal(pPager) ); - rc = sqlite3OsWrite(pPager->fd, (u8*)aData, pPager->pageSize, ofst); + rc = sqlite3OsWrite(pPager->fd, (u8 *)aData, pPager->pageSize, ofst); if( pgno>pPager->dbFileSize ){ pPager->dbFileSize = pgno; } @@ -40031,6 +40990,7 @@ static int pager_playback(Pager *pPager, int isHot){ int res = 1; /* Value returned by sqlite3OsAccess() */ char *zMaster = 0; /* Name of master journal file if any */ int needPagerReset; /* True to reset page prior to first page rollback */ + int nPlayback = 0; /* Total number of pages restored from journal */ /* Figure out how many records are in the journal. Abort early if ** the journal is empty. @@ -40131,7 +41091,9 @@ static int pager_playback(Pager *pPager, int isHot){ needPagerReset = 0; } rc = pager_playback_one_page(pPager,&pPager->journalOff,0,1,0); - if( rc!=SQLITE_OK ){ + if( rc==SQLITE_OK ){ + nPlayback++; + }else{ if( rc==SQLITE_DONE ){ pPager->journalOff = szJ; break; @@ -40201,6 +41163,10 @@ end_playback: rc = pager_delmaster(pPager, zMaster); testcase( rc!=SQLITE_OK ); } + if( isHot && nPlayback ){ + sqlite3_log(SQLITE_NOTICE_RECOVER_ROLLBACK, "recovered %d pages from %s", + nPlayback, pPager->zJournal); + } /* The Pager.sectorSize variable may have been updated while rolling ** back a journal created by a process with a different sector size @@ -40222,11 +41188,10 @@ end_playback: ** If an IO error occurs, then the IO error is returned to the caller. ** Otherwise, SQLITE_OK is returned. */ -static int readDbPage(PgHdr *pPg){ +static int readDbPage(PgHdr *pPg, u32 iFrame){ Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */ Pgno pgno = pPg->pgno; /* Page number to read */ int rc = SQLITE_OK; /* Return code */ - int isInWal = 0; /* True if page is in log file */ int pgsz = pPager->pageSize; /* Number of bytes to read */ assert( pPager->eState>=PAGER_READER && !MEMDB ); @@ -40238,11 +41203,13 @@ static int readDbPage(PgHdr *pPg){ return SQLITE_OK; } - if( pagerUseWal(pPager) ){ +#ifndef SQLITE_OMIT_WAL + if( iFrame ){ /* Try to pull the page from the write-ahead log. */ - rc = sqlite3WalRead(pPager->pWal, pgno, &isInWal, pgsz, pPg->pData); - } - if( rc==SQLITE_OK && !isInWal ){ + rc = sqlite3WalReadFrame(pPager->pWal, iFrame, pgsz, pPg->pData); + }else +#endif + { i64 iOffset = (pgno-1)*(i64)pPager->pageSize; rc = sqlite3OsRead(pPager->fd, pPg->pData, pgsz, iOffset); if( rc==SQLITE_IOERR_SHORT_READ ){ @@ -40321,12 +41288,17 @@ static int pagerUndoCallback(void *pCtx, Pgno iPg){ Pager *pPager = (Pager *)pCtx; PgHdr *pPg; + assert( pagerUseWal(pPager) ); pPg = sqlite3PagerLookup(pPager, iPg); if( pPg ){ if( sqlite3PcachePageRefcount(pPg)==1 ){ sqlite3PcacheDrop(pPg); }else{ - rc = readDbPage(pPg); + u32 iFrame = 0; + rc = sqlite3WalFindFrame(pPager->pWal, pPg->pgno, &iFrame); + if( rc==SQLITE_OK ){ + rc = readDbPage(pPg, iFrame); + } if( rc==SQLITE_OK ){ pPager->xReiniter(pPg); } @@ -40470,6 +41442,7 @@ static int pagerBeginReadTransaction(Pager *pPager){ rc = sqlite3WalBeginReadTransaction(pPager->pWal, &changed); if( rc!=SQLITE_OK || changed ){ pager_reset(pPager); + if( USEFETCH(pPager) ) sqlite3OsUnfetch(pPager->fd, 0, 0); } return rc; @@ -40731,6 +41704,29 @@ SQLITE_PRIVATE void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){ sqlite3PcacheSetCachesize(pPager->pPCache, mxPage); } +/* +** Invoke SQLITE_FCNTL_MMAP_SIZE based on the current value of szMmap. +*/ +static void pagerFixMaplimit(Pager *pPager){ +#if SQLITE_MAX_MMAP_SIZE>0 + sqlite3_file *fd = pPager->fd; + if( isOpen(fd) ){ + sqlite3_int64 sz; + pPager->bUseFetch = (fd->pMethods->iVersion>=3) && pPager->szMmap>0; + sz = pPager->szMmap; + sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_MMAP_SIZE, &sz); + } +#endif +} + +/* +** Change the maximum size of any memory mapping made of the database file. +*/ +SQLITE_PRIVATE void sqlite3PagerSetMmapLimit(Pager *pPager, sqlite3_int64 szMmap){ + pPager->szMmap = szMmap; + pagerFixMaplimit(pPager); +} + /* ** Free as much memory as possible from the pager. */ @@ -40966,6 +41962,7 @@ SQLITE_PRIVATE int sqlite3PagerSetPagesize(Pager *pPager, u32 *pPageSize, int nR assert( nReserve>=0 && nReserve<1000 ); pPager->nReserve = (i16)nReserve; pagerReportSize(pPager); + pagerFixMaplimit(pPager); } return rc; } @@ -41191,6 +42188,81 @@ static int pagerSyncHotJournal(Pager *pPager){ return rc; } +/* +** Obtain a reference to a memory mapped page object for page number pgno. +** The new object will use the pointer pData, obtained from xFetch(). +** If successful, set *ppPage to point to the new page reference +** and return SQLITE_OK. Otherwise, return an SQLite error code and set +** *ppPage to zero. +** +** Page references obtained by calling this function should be released +** by calling pagerReleaseMapPage(). +*/ +static int pagerAcquireMapPage( + Pager *pPager, /* Pager object */ + Pgno pgno, /* Page number */ + void *pData, /* xFetch()'d data for this page */ + PgHdr **ppPage /* OUT: Acquired page object */ +){ + PgHdr *p; /* Memory mapped page to return */ + + if( pPager->pMmapFreelist ){ + *ppPage = p = pPager->pMmapFreelist; + pPager->pMmapFreelist = p->pDirty; + p->pDirty = 0; + memset(p->pExtra, 0, pPager->nExtra); + }else{ + *ppPage = p = (PgHdr *)sqlite3MallocZero(sizeof(PgHdr) + pPager->nExtra); + if( p==0 ){ + sqlite3OsUnfetch(pPager->fd, (i64)(pgno-1) * pPager->pageSize, pData); + return SQLITE_NOMEM; + } + p->pExtra = (void *)&p[1]; + p->flags = PGHDR_MMAP; + p->nRef = 1; + p->pPager = pPager; + } + + assert( p->pExtra==(void *)&p[1] ); + assert( p->pPage==0 ); + assert( p->flags==PGHDR_MMAP ); + assert( p->pPager==pPager ); + assert( p->nRef==1 ); + + p->pgno = pgno; + p->pData = pData; + pPager->nMmapOut++; + + return SQLITE_OK; +} + +/* +** Release a reference to page pPg. pPg must have been returned by an +** earlier call to pagerAcquireMapPage(). +*/ +static void pagerReleaseMapPage(PgHdr *pPg){ + Pager *pPager = pPg->pPager; + pPager->nMmapOut--; + pPg->pDirty = pPager->pMmapFreelist; + pPager->pMmapFreelist = pPg; + + assert( pPager->fd->pMethods->iVersion>=3 ); + sqlite3OsUnfetch(pPager->fd, (i64)(pPg->pgno-1)*pPager->pageSize, pPg->pData); +} + +/* +** Free all PgHdr objects stored in the Pager.pMmapFreelist list. +*/ +static void pagerFreeMapHdrs(Pager *pPager){ + PgHdr *p; + PgHdr *pNext; + for(p=pPager->pMmapFreelist; p; p=pNext){ + pNext = p->pDirty; + sqlite3_free(p); + } +} + + /* ** Shutdown the page cache. Free all memory and close all files. ** @@ -41211,6 +42283,7 @@ SQLITE_PRIVATE int sqlite3PagerClose(Pager *pPager){ assert( assert_pager_state(pPager) ); disable_simulated_io_errors(); sqlite3BeginBenignMalloc(); + pagerFreeMapHdrs(pPager); /* pPager->errCode = 0; */ pPager->exclusiveMode = 0; #ifndef SQLITE_OMIT_WAL @@ -41472,7 +42545,9 @@ static int pager_write_pagelist(Pager *pPager, PgHdr *pList){ ** file size will be. */ assert( rc!=SQLITE_OK || isOpen(pPager->fd) ); - if( rc==SQLITE_OK && pPager->dbSize>pPager->dbHintSize ){ + if( rc==SQLITE_OK + && (pList->pDirty ? pPager->dbSize : pList->pgno+1)>pPager->dbHintSize + ){ sqlite3_int64 szFile = pPager->pageSize * (sqlite3_int64)pPager->dbSize; sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_SIZE_HINT, &szFile); pPager->dbHintSize = pPager->dbSize; @@ -42026,6 +43101,7 @@ SQLITE_PRIVATE int sqlite3PagerOpen( /* pPager->pBusyHandlerArg = 0; */ pPager->xReiniter = xReinit; /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */ + /* pPager->szMmap = SQLITE_DEFAULT_MMAP_SIZE // will be set by btree.c */ *ppPager = pPager; return SQLITE_OK; @@ -42317,9 +43393,11 @@ SQLITE_PRIVATE int sqlite3PagerSharedLock(Pager *pPager){ ); } - if( !pPager->tempFile - && (pPager->pBackup || sqlite3PcachePagecount(pPager->pPCache)>0) - ){ + if( !pPager->tempFile && ( + pPager->pBackup + || sqlite3PcachePagecount(pPager->pPCache)>0 + || USEFETCH(pPager) + )){ /* The shared-lock has just been acquired on the database file ** and there are already pages in the cache (from a previous ** read or write transaction). Check to see if the database @@ -42345,7 +43423,7 @@ SQLITE_PRIVATE int sqlite3PagerSharedLock(Pager *pPager){ if( nPage>0 ){ IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers))); rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24); - if( rc!=SQLITE_OK ){ + if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){ goto failed; } }else{ @@ -42354,6 +43432,16 @@ SQLITE_PRIVATE int sqlite3PagerSharedLock(Pager *pPager){ if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){ pager_reset(pPager); + + /* Unmap the database file. It is possible that external processes + ** may have truncated the database file and then extended it back + ** to its original size while this process was not holding a lock. + ** In this case there may exist a Pager.pMap mapping that appears + ** to be the right size but is not actually valid. Avoid this + ** possibility by unmapping the db here. */ + if( USEFETCH(pPager) ){ + sqlite3OsUnfetch(pPager->fd, 0, 0); + } } } @@ -42395,7 +43483,7 @@ SQLITE_PRIVATE int sqlite3PagerSharedLock(Pager *pPager){ ** nothing to rollback, so this routine is a no-op. */ static void pagerUnlockIfUnused(Pager *pPager){ - if( (sqlite3PcacheRefCount(pPager->pPCache)==0) ){ + if( pPager->nMmapOut==0 && (sqlite3PcacheRefCount(pPager->pPCache)==0) ){ pagerUnlockAndRollback(pPager); } } @@ -42454,13 +43542,27 @@ SQLITE_PRIVATE int sqlite3PagerAcquire( Pager *pPager, /* The pager open on the database file */ Pgno pgno, /* Page number to fetch */ DbPage **ppPage, /* Write a pointer to the page here */ - int noContent /* Do not bother reading content from disk if true */ + int flags /* PAGER_ACQUIRE_XXX flags */ ){ - int rc; - PgHdr *pPg; + int rc = SQLITE_OK; + PgHdr *pPg = 0; + u32 iFrame = 0; /* Frame to read from WAL file */ + const int noContent = (flags & PAGER_ACQUIRE_NOCONTENT); + + /* It is acceptable to use a read-only (mmap) page for any page except + ** page 1 if there is no write-transaction open or the ACQUIRE_READONLY + ** flag was specified by the caller. And so long as the db is not a + ** temporary or in-memory database. */ + const int bMmapOk = (pgno!=1 && USEFETCH(pPager) + && (pPager->eState==PAGER_READER || (flags & PAGER_ACQUIRE_READONLY)) +#ifdef SQLITE_HAS_CODEC + && pPager->xCodec==0 +#endif + ); assert( pPager->eState>=PAGER_READER ); assert( assert_pager_state(pPager) ); + assert( noContent==0 || bMmapOk==0 ); if( pgno==0 ){ return SQLITE_CORRUPT_BKPT; @@ -42471,6 +43573,39 @@ SQLITE_PRIVATE int sqlite3PagerAcquire( if( pPager->errCode!=SQLITE_OK ){ rc = pPager->errCode; }else{ + + if( bMmapOk && pagerUseWal(pPager) ){ + rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iFrame); + if( rc!=SQLITE_OK ) goto pager_acquire_err; + } + + if( iFrame==0 && bMmapOk ){ + void *pData = 0; + + rc = sqlite3OsFetch(pPager->fd, + (i64)(pgno-1) * pPager->pageSize, pPager->pageSize, &pData + ); + + if( rc==SQLITE_OK && pData ){ + if( pPager->eState>PAGER_READER ){ + (void)sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &pPg); + } + if( pPg==0 ){ + rc = pagerAcquireMapPage(pPager, pgno, pData, &pPg); + }else{ + sqlite3OsUnfetch(pPager->fd, (i64)(pgno-1)*pPager->pageSize, pData); + } + if( pPg ){ + assert( rc==SQLITE_OK ); + *ppPage = pPg; + return SQLITE_OK; + } + } + if( rc!=SQLITE_OK ){ + goto pager_acquire_err; + } + } + rc = sqlite3PcacheFetch(pPager->pPCache, pgno, 1, ppPage); } @@ -42529,9 +43664,13 @@ SQLITE_PRIVATE int sqlite3PagerAcquire( memset(pPg->pData, 0, pPager->pageSize); IOTRACE(("ZERO %p %d\n", pPager, pgno)); }else{ + if( pagerUseWal(pPager) && bMmapOk==0 ){ + rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iFrame); + if( rc!=SQLITE_OK ) goto pager_acquire_err; + } assert( pPg->pPager==pPager ); pPager->aStat[PAGER_STAT_MISS]++; - rc = readDbPage(pPg); + rc = readDbPage(pPg, iFrame); if( rc!=SQLITE_OK ){ goto pager_acquire_err; } @@ -42584,7 +43723,11 @@ SQLITE_PRIVATE DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){ SQLITE_PRIVATE void sqlite3PagerUnref(DbPage *pPg){ if( pPg ){ Pager *pPager = pPg->pPager; - sqlite3PcacheRelease(pPg); + if( pPg->flags & PGHDR_MMAP ){ + pagerReleaseMapPage(pPg); + }else{ + sqlite3PcacheRelease(pPg); + } pagerUnlockIfUnused(pPager); } } @@ -42919,6 +44062,7 @@ SQLITE_PRIVATE int sqlite3PagerWrite(DbPage *pDbPage){ Pager *pPager = pPg->pPager; Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize); + assert( (pPg->flags & PGHDR_MMAP)==0 ); assert( pPager->eState>=PAGER_WRITER_LOCKED ); assert( pPager->eState!=PAGER_ERROR ); assert( assert_pager_state(pPager) ); @@ -43118,6 +44262,11 @@ static int pager_incr_changecounter(Pager *pPager, int isDirectMode){ pPager->aStat[PAGER_STAT_WRITE]++; } if( rc==SQLITE_OK ){ + /* Update the pager's copy of the change-counter. Otherwise, the + ** next time a read transaction is opened the cache will be + ** flushed (as the change-counter values will not match). */ + const void *pCopy = (const void *)&((const char *)zBuf)[24]; + memcpy(&pPager->dbFileVers, pCopy, sizeof(pPager->dbFileVers)); pPager->changeCountDone = 1; } }else{ @@ -43475,7 +44624,7 @@ SQLITE_PRIVATE int sqlite3PagerRollback(Pager *pPager){ } assert( pPager->eState==PAGER_READER || rc!=SQLITE_OK ); - assert( rc==SQLITE_OK || rc==SQLITE_FULL + assert( rc==SQLITE_OK || rc==SQLITE_FULL || rc==SQLITE_CORRUPT || rc==SQLITE_NOMEM || (rc&0xFF)==SQLITE_IOERR ); /* If an error occurs during a ROLLBACK, we can no longer trust the pager @@ -44209,11 +45358,12 @@ static int pagerOpenWal(Pager *pPager){ ** (e.g. due to malloc() failure), return an error code. */ if( rc==SQLITE_OK ){ - rc = sqlite3WalOpen(pPager->pVfs, + rc = sqlite3WalOpen(pPager->pVfs, pPager->fd, pPager->zWal, pPager->exclusiveMode, pPager->journalSizeLimit, &pPager->pWal ); } + pagerFixMaplimit(pPager); return rc; } @@ -44304,6 +45454,7 @@ SQLITE_PRIVATE int sqlite3PagerCloseWal(Pager *pPager){ rc = sqlite3WalClose(pPager->pWal, pPager->ckptSyncFlags, pPager->pageSize, (u8*)pPager->pTmpSpace); pPager->pWal = 0; + pagerFixMaplimit(pPager); } } return rc; @@ -45552,8 +46703,9 @@ finished: ** checkpointing the log file. */ if( pWal->hdr.nPage ){ - sqlite3_log(SQLITE_OK, "Recovered %d frames from WAL file %s", - pWal->hdr.nPage, pWal->zWalName + sqlite3_log(SQLITE_NOTICE_RECOVER_WAL, + "recovered %d frames from WAL file %s", + pWal->hdr.mxFrame, pWal->zWalName ); } } @@ -46067,8 +47219,8 @@ static int walCheckpoint( rc = sqlite3OsSync(pWal->pWalFd, sync_flags); } - /* If the database file may grow as a result of this checkpoint, hint - ** about the eventual size of the db file to the VFS layer. + /* If the database may grow as a result of this checkpoint, hint + ** about the eventual size of the db file to the VFS layer. */ if( rc==SQLITE_OK ){ i64 nReq = ((i64)mxPage * szPage); @@ -46078,6 +47230,7 @@ static int walCheckpoint( } } + /* Iterate through the contents of the WAL, copying data to the db file. */ while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ i64 iOffset; @@ -46632,19 +47785,17 @@ SQLITE_PRIVATE void sqlite3WalEndReadTransaction(Wal *pWal){ } /* -** Read a page from the WAL, if it is present in the WAL and if the -** current read transaction is configured to use the WAL. +** Search the wal file for page pgno. If found, set *piRead to the frame that +** contains the page. Otherwise, if pgno is not in the wal file, set *piRead +** to zero. ** -** The *pInWal is set to 1 if the requested page is in the WAL and -** has been loaded. Or *pInWal is set to 0 if the page was not in -** the WAL and needs to be read out of the database. +** Return SQLITE_OK if successful, or an error code if an error occurs. If an +** error does occur, the final value of *piRead is undefined. */ -SQLITE_PRIVATE int sqlite3WalRead( +SQLITE_PRIVATE int sqlite3WalFindFrame( Wal *pWal, /* WAL handle */ Pgno pgno, /* Database page number to read data for */ - int *pInWal, /* OUT: True if data is read from WAL */ - int nOut, /* Size of buffer pOut in bytes */ - u8 *pOut /* Buffer to write page data to */ + u32 *piRead /* OUT: Frame number (or zero) */ ){ u32 iRead = 0; /* If !=0, WAL frame to return data from */ u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ @@ -46660,7 +47811,7 @@ SQLITE_PRIVATE int sqlite3WalRead( ** WAL were empty. */ if( iLast==0 || pWal->readLock==0 ){ - *pInWal = 0; + *piRead = 0; return SQLITE_OK; } @@ -46731,26 +47882,31 @@ SQLITE_PRIVATE int sqlite3WalRead( } #endif - /* If iRead is non-zero, then it is the log frame number that contains the - ** required page. Read and return data from the log file. - */ - if( iRead ){ - int sz; - i64 iOffset; - sz = pWal->hdr.szPage; - sz = (sz&0xfe00) + ((sz&0x0001)<<16); - testcase( sz<=32768 ); - testcase( sz>=65536 ); - iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE; - *pInWal = 1; - /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */ - return sqlite3OsRead(pWal->pWalFd, pOut, (nOut>sz ? sz : nOut), iOffset); - } - - *pInWal = 0; + *piRead = iRead; return SQLITE_OK; } +/* +** Read the contents of frame iRead from the wal file into buffer pOut +** (which is nOut bytes in size). Return SQLITE_OK if successful, or an +** error code otherwise. +*/ +SQLITE_PRIVATE int sqlite3WalReadFrame( + Wal *pWal, /* WAL handle */ + u32 iRead, /* Frame to read */ + int nOut, /* Size of buffer pOut in bytes */ + u8 *pOut /* Buffer to write page data to */ +){ + int sz; + i64 iOffset; + sz = pWal->hdr.szPage; + sz = (sz&0xfe00) + ((sz&0x0001)<<16); + testcase( sz<=32768 ); + testcase( sz>=65536 ); + iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE; + /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */ + return sqlite3OsRead(pWal->pWalFd, pOut, (nOut>sz ? sz : nOut), iOffset); +} /* ** Return the size of the database in pages (or zero, if unknown). @@ -47297,6 +48453,9 @@ SQLITE_PRIVATE int sqlite3WalCheckpoint( /* Read the wal-index header. */ if( rc==SQLITE_OK ){ rc = walIndexReadHdr(pWal, &isChanged); + if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){ + sqlite3OsUnfetch(pWal->pDbFd, 0, 0); + } } /* Copy data from the log to the database file. */ @@ -49968,13 +51127,17 @@ static int btreeGetPage( BtShared *pBt, /* The btree */ Pgno pgno, /* Number of the page to fetch */ MemPage **ppPage, /* Return the page in this parameter */ - int noContent /* Do not load page content if true */ + int noContent, /* Do not load page content if true */ + int bReadonly /* True if a read-only (mmap) page is ok */ ){ int rc; DbPage *pDbPage; + int flags = (noContent ? PAGER_ACQUIRE_NOCONTENT : 0) + | (bReadonly ? PAGER_ACQUIRE_READONLY : 0); + assert( noContent==0 || bReadonly==0 ); assert( sqlite3_mutex_held(pBt->mutex) ); - rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, noContent); + rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, flags); if( rc ) return rc; *ppPage = btreePageFromDbPage(pDbPage, pgno, pBt); return SQLITE_OK; @@ -50017,9 +51180,10 @@ SQLITE_PRIVATE u32 sqlite3BtreeLastPage(Btree *p){ ** may remain unchanged, or it may be set to an invalid value. */ static int getAndInitPage( - BtShared *pBt, /* The database file */ - Pgno pgno, /* Number of the page to get */ - MemPage **ppPage /* Write the page pointer here */ + BtShared *pBt, /* The database file */ + Pgno pgno, /* Number of the page to get */ + MemPage **ppPage, /* Write the page pointer here */ + int bReadonly /* True if a read-only (mmap) page is ok */ ){ int rc; assert( sqlite3_mutex_held(pBt->mutex) ); @@ -50027,7 +51191,7 @@ static int getAndInitPage( if( pgno>btreePagecount(pBt) ){ rc = SQLITE_CORRUPT_BKPT; }else{ - rc = btreeGetPage(pBt, pgno, ppPage, 0); + rc = btreeGetPage(pBt, pgno, ppPage, 0, bReadonly); if( rc==SQLITE_OK ){ rc = btreeInitPage(*ppPage); if( rc!=SQLITE_OK ){ @@ -50258,6 +51422,7 @@ SQLITE_PRIVATE int sqlite3BtreeOpen( rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename, EXTRA_SIZE, flags, vfsFlags, pageReinit); if( rc==SQLITE_OK ){ + sqlite3PagerSetMmapLimit(pBt->pPager, db->szMmap); rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader); } if( rc!=SQLITE_OK ){ @@ -50524,6 +51689,19 @@ SQLITE_PRIVATE int sqlite3BtreeSetCacheSize(Btree *p, int mxPage){ return SQLITE_OK; } +/* +** Change the limit on the amount of the database file that may be +** memory mapped. +*/ +SQLITE_PRIVATE int sqlite3BtreeSetMmapLimit(Btree *p, sqlite3_int64 szMmap){ + BtShared *pBt = p->pBt; + assert( sqlite3_mutex_held(p->db->mutex) ); + sqlite3BtreeEnter(p); + sqlite3PagerSetMmapLimit(pBt->pPager, szMmap); + sqlite3BtreeLeave(p); + return SQLITE_OK; +} + /* ** Change the way data is synced to disk in order to increase or decrease ** how well the database resists damage due to OS crashes and power @@ -50749,7 +51927,7 @@ static int lockBtree(BtShared *pBt){ assert( pBt->pPage1==0 ); rc = sqlite3PagerSharedLock(pBt->pPager); if( rc!=SQLITE_OK ) return rc; - rc = btreeGetPage(pBt, 1, &pPage1, 0); + rc = btreeGetPage(pBt, 1, &pPage1, 0, 0); if( rc!=SQLITE_OK ) return rc; /* Do some checking to help insure the file we opened really is @@ -50885,6 +52063,29 @@ page1_init_failed: return rc; } +#ifndef NDEBUG +/* +** Return the number of cursors open on pBt. This is for use +** in assert() expressions, so it is only compiled if NDEBUG is not +** defined. +** +** Only write cursors are counted if wrOnly is true. If wrOnly is +** false then all cursors are counted. +** +** For the purposes of this routine, a cursor is any cursor that +** is capable of reading or writing to the databse. Cursors that +** have been tripped into the CURSOR_FAULT state are not counted. +*/ +static int countValidCursors(BtShared *pBt, int wrOnly){ + BtCursor *pCur; + int r = 0; + for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){ + if( (wrOnly==0 || pCur->wrFlag) && pCur->eState!=CURSOR_FAULT ) r++; + } + return r; +} +#endif + /* ** If there are no outstanding cursors and we are not in the middle ** of a transaction but there is a read lock on the database, then @@ -50895,7 +52096,7 @@ page1_init_failed: */ static void unlockBtreeIfUnused(BtShared *pBt){ assert( sqlite3_mutex_held(pBt->mutex) ); - assert( pBt->pCursor==0 || pBt->inTransaction>TRANS_NONE ); + assert( countValidCursors(pBt,0)==0 || pBt->inTransaction>TRANS_NONE ); if( pBt->inTransaction==TRANS_NONE && pBt->pPage1!=0 ){ assert( pBt->pPage1->aData ); assert( sqlite3PagerRefcount(pBt->pPager)==1 ); @@ -51308,7 +52509,7 @@ static int relocatePage( ** iPtrPage. */ if( eType!=PTRMAP_ROOTPAGE ){ - rc = btreeGetPage(pBt, iPtrPage, &pPtrPage, 0); + rc = btreeGetPage(pBt, iPtrPage, &pPtrPage, 0, 0); if( rc!=SQLITE_OK ){ return rc; } @@ -51392,7 +52593,7 @@ static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg, int bCommit){ u8 eMode = BTALLOC_ANY; /* Mode parameter for allocateBtreePage() */ Pgno iNear = 0; /* nearby parameter for allocateBtreePage() */ - rc = btreeGetPage(pBt, iLastPg, &pLastPg, 0); + rc = btreeGetPage(pBt, iLastPg, &pLastPg, 0, 0); if( rc!=SQLITE_OK ){ return rc; } @@ -51484,8 +52685,11 @@ SQLITE_PRIVATE int sqlite3BtreeIncrVacuum(Btree *p){ if( nOrig0 ){ - invalidateAllOverflowCache(pBt); - rc = incrVacuumStep(pBt, nFin, nOrig, 0); + rc = saveAllCursors(pBt, 0, 0); + if( rc==SQLITE_OK ){ + invalidateAllOverflowCache(pBt); + rc = incrVacuumStep(pBt, nFin, nOrig, 0); + } if( rc==SQLITE_OK ){ rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); put4byte(&pBt->pPage1->aData[28], pBt->nPage); @@ -51533,7 +52737,9 @@ static int autoVacuumCommit(BtShared *pBt){ nFree = get4byte(&pBt->pPage1->aData[36]); nFin = finalDbSize(pBt, nOrig, nFree); if( nFin>nOrig ) return SQLITE_CORRUPT_BKPT; - + if( nFinnFin && rc==SQLITE_OK; iFree--){ rc = incrVacuumStep(pBt, nFin, iFree, 1); } @@ -51550,7 +52756,7 @@ static int autoVacuumCommit(BtShared *pBt){ } } - assert( nRef==sqlite3PagerRefcount(pPager) ); + assert( nRef>=sqlite3PagerRefcount(pPager) ); return rc; } @@ -51618,7 +52824,6 @@ static void btreeEndTransaction(Btree *p){ #ifndef SQLITE_OMIT_AUTOVACUUM pBt->bDoTruncate = 0; #endif - btreeClearHasContent(pBt); if( p->inTrans>TRANS_NONE && p->db->activeVdbeCnt>1 ){ /* If there are other active statements that belong to this database ** handle, downgrade to a read-only transaction. The other statements @@ -51693,6 +52898,7 @@ SQLITE_PRIVATE int sqlite3BtreeCommitPhaseTwo(Btree *p, int bCleanup){ return rc; } pBt->inTransaction = TRANS_READ; + btreeClearHasContent(pBt); } btreeEndTransaction(p); @@ -51714,27 +52920,6 @@ SQLITE_PRIVATE int sqlite3BtreeCommit(Btree *p){ return rc; } -#ifndef NDEBUG -/* -** Return the number of write-cursors open on this handle. This is for use -** in assert() expressions, so it is only compiled if NDEBUG is not -** defined. -** -** For the purposes of this routine, a write-cursor is any cursor that -** is capable of writing to the databse. That means the cursor was -** originally opened for writing and the cursor has not be disabled -** by having its state changed to CURSOR_FAULT. -*/ -static int countWriteCursors(BtShared *pBt){ - BtCursor *pCur; - int r = 0; - for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){ - if( pCur->wrFlag && pCur->eState!=CURSOR_FAULT ) r++; - } - return r; -} -#endif - /* ** This routine sets the state to CURSOR_FAULT and the error ** code to errCode for every cursor on BtShared that pBtree @@ -51806,7 +52991,7 @@ SQLITE_PRIVATE int sqlite3BtreeRollback(Btree *p, int tripCode){ /* The rollback may have destroyed the pPage1->aData value. So ** call btreeGetPage() on page 1 again to make ** sure pPage1->aData is set correctly. */ - if( btreeGetPage(pBt, 1, &pPage1, 0)==SQLITE_OK ){ + if( btreeGetPage(pBt, 1, &pPage1, 0, 0)==SQLITE_OK ){ int nPage = get4byte(28+(u8*)pPage1->aData); testcase( nPage==0 ); if( nPage==0 ) sqlite3PagerPagecount(pBt->pPager, &nPage); @@ -51814,8 +52999,9 @@ SQLITE_PRIVATE int sqlite3BtreeRollback(Btree *p, int tripCode){ pBt->nPage = nPage; releasePage(pPage1); } - assert( countWriteCursors(pBt)==0 ); + assert( countValidCursors(pBt, 1)==0 ); pBt->inTransaction = TRANS_READ; + btreeClearHasContent(pBt); } btreeEndTransaction(p); @@ -52240,7 +53426,7 @@ static int getOverflowPage( assert( next==0 || rc==SQLITE_DONE ); if( rc==SQLITE_OK ){ - rc = btreeGetPage(pBt, ovfl, &pPage, 0); + rc = btreeGetPage(pBt, ovfl, &pPage, 0, (ppPage==0)); assert( rc==SQLITE_OK || pPage==0 ); if( rc==SQLITE_OK ){ next = get4byte(pPage->aData); @@ -52461,7 +53647,9 @@ static int accessPayload( { DbPage *pDbPage; - rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage); + rc = sqlite3PagerAcquire(pBt->pPager, nextPage, &pDbPage, + (eOp==0 ? PAGER_ACQUIRE_READONLY : 0) + ); if( rc==SQLITE_OK ){ aPayload = sqlite3PagerGetData(pDbPage); nextPage = get4byte(aPayload); @@ -52640,10 +53828,11 @@ static int moveToChild(BtCursor *pCur, u32 newPgno){ assert( cursorHoldsMutex(pCur) ); assert( pCur->eState==CURSOR_VALID ); assert( pCur->iPageiPage>=0 ); if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){ return SQLITE_CORRUPT_BKPT; } - rc = getAndInitPage(pBt, newPgno, &pNewPage); + rc = getAndInitPage(pBt, newPgno, &pNewPage, (pCur->wrFlag==0)); if( rc ) return rc; pCur->apPage[i+1] = pNewPage; pCur->aiIdx[i+1] = 0; @@ -52760,7 +53949,7 @@ static int moveToRoot(BtCursor *pCur){ pCur->eState = CURSOR_INVALID; return SQLITE_OK; }else{ - rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->apPage[0]); + rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->apPage[0], pCur->wrFlag==0); if( rc!=SQLITE_OK ){ pCur->eState = CURSOR_INVALID; return rc; @@ -53374,7 +54563,7 @@ static int allocateBtreePage( if( iTrunk>mxPage ){ rc = SQLITE_CORRUPT_BKPT; }else{ - rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0); + rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0, 0); } if( rc ){ pTrunk = 0; @@ -53438,7 +54627,7 @@ static int allocateBtreePage( goto end_allocate_page; } testcase( iNewTrunk==mxPage ); - rc = btreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0); + rc = btreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0, 0); if( rc!=SQLITE_OK ){ goto end_allocate_page; } @@ -53518,7 +54707,7 @@ static int allocateBtreePage( } put4byte(&aData[4], k-1); noContent = !btreeGetHasContent(pBt, *pPgno); - rc = btreeGetPage(pBt, *pPgno, ppPage, noContent); + rc = btreeGetPage(pBt, *pPgno, ppPage, noContent, 0); if( rc==SQLITE_OK ){ rc = sqlite3PagerWrite((*ppPage)->pDbPage); if( rc!=SQLITE_OK ){ @@ -53566,7 +54755,7 @@ static int allocateBtreePage( MemPage *pPg = 0; TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage)); assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) ); - rc = btreeGetPage(pBt, pBt->nPage, &pPg, bNoContent); + rc = btreeGetPage(pBt, pBt->nPage, &pPg, bNoContent, 0); if( rc==SQLITE_OK ){ rc = sqlite3PagerWrite(pPg->pDbPage); releasePage(pPg); @@ -53580,7 +54769,7 @@ static int allocateBtreePage( *pPgno = pBt->nPage; assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); - rc = btreeGetPage(pBt, *pPgno, ppPage, bNoContent); + rc = btreeGetPage(pBt, *pPgno, ppPage, bNoContent, 0); if( rc ) return rc; rc = sqlite3PagerWrite((*ppPage)->pDbPage); if( rc!=SQLITE_OK ){ @@ -53648,7 +54837,7 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){ /* If the secure_delete option is enabled, then ** always fully overwrite deleted information with zeros. */ - if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0))!=0) ) + if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0, 0))!=0) ) || ((rc = sqlite3PagerWrite(pPage->pDbPage))!=0) ){ goto freepage_out; @@ -53675,7 +54864,7 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){ u32 nLeaf; /* Initial number of leaf cells on trunk page */ iTrunk = get4byte(&pPage1->aData[32]); - rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0); + rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0, 0); if( rc!=SQLITE_OK ){ goto freepage_out; } @@ -53721,7 +54910,7 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){ ** first trunk in the free-list is full. Either way, the page being freed ** will become the new first trunk page in the free-list. */ - if( pPage==0 && SQLITE_OK!=(rc = btreeGetPage(pBt, iPage, &pPage, 0)) ){ + if( pPage==0 && SQLITE_OK!=(rc = btreeGetPage(pBt, iPage, &pPage, 0, 0)) ){ goto freepage_out; } rc = sqlite3PagerWrite(pPage->pDbPage); @@ -54522,7 +55711,7 @@ static int balance_nonroot( } pgno = get4byte(pRight); while( 1 ){ - rc = getAndInitPage(pBt, pgno, &apOld[i]); + rc = getAndInitPage(pBt, pgno, &apOld[i], 0); if( rc ){ memset(apOld, 0, (i+1)*sizeof(MemPage*)); goto balance_cleanup; @@ -55610,10 +56799,17 @@ static int btreeCreateTable(Btree *p, int *piTable, int createTabFlags){ u8 eType = 0; Pgno iPtrPage = 0; + /* Save the positions of any open cursors. This is required in + ** case they are holding a reference to an xFetch reference + ** corresponding to page pgnoRoot. */ + rc = saveAllCursors(pBt, 0, 0); releasePage(pPageMove); + if( rc!=SQLITE_OK ){ + return rc; + } /* Move the page currently at pgnoRoot to pgnoMove. */ - rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0); + rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0, 0); if( rc!=SQLITE_OK ){ return rc; } @@ -55634,7 +56830,7 @@ static int btreeCreateTable(Btree *p, int *piTable, int createTabFlags){ if( rc!=SQLITE_OK ){ return rc; } - rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0); + rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0, 0); if( rc!=SQLITE_OK ){ return rc; } @@ -55710,7 +56906,7 @@ static int clearDatabasePage( return SQLITE_CORRUPT_BKPT; } - rc = getAndInitPage(pBt, pgno, &pPage); + rc = getAndInitPage(pBt, pgno, &pPage, 0); if( rc ) return rc; for(i=0; inCell; i++){ pCell = findCell(pPage, i); @@ -55812,7 +57008,7 @@ static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){ return SQLITE_LOCKED_SHAREDCACHE; } - rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0); + rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0, 0); if( rc ) return rc; rc = sqlite3BtreeClearTable(p, iTable, 0); if( rc ){ @@ -55847,7 +57043,7 @@ static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){ */ MemPage *pMove; releasePage(pPage); - rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0); + rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0, 0); if( rc!=SQLITE_OK ){ return rc; } @@ -55857,7 +57053,7 @@ static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){ return rc; } pMove = 0; - rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0); + rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0, 0); freePage(pMove, &rc); releasePage(pMove); if( rc!=SQLITE_OK ){ @@ -56269,7 +57465,7 @@ static int checkTreePage( usableSize = pBt->usableSize; if( iPage==0 ) return 0; if( checkRef(pCheck, iPage, zParentContext) ) return 0; - if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){ + if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0, 0))!=0 ){ checkAppendMsg(pCheck, zContext, "unable to get the page. error code=%d", rc); return 0; @@ -56741,6 +57937,17 @@ SQLITE_PRIVATE int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void return SQLITE_ABORT; } + /* Save the positions of all other cursors open on this table. This is + ** required in case any of them are holding references to an xFetch + ** version of the b-tree page modified by the accessPayload call below. + ** + ** Note that pCsr must be open on a BTREE_INTKEY table and saveCursorPosition() + ** and hence saveAllCursors() cannot fail on a BTREE_INTKEY table, hence + ** saveAllCursors can only return SQLITE_OK. + */ + VVA_ONLY(rc =) saveAllCursors(pCsr->pBt, pCsr->pgnoRoot, pCsr); + assert( rc==SQLITE_OK ); + /* Check some assumptions: ** (a) the cursor is open for writing, ** (b) there is a read/write transaction open, @@ -57222,7 +58429,8 @@ SQLITE_API int sqlite3_backup_step(sqlite3_backup *p, int nPage){ const Pgno iSrcPg = p->iNext; /* Source page number */ if( iSrcPg!=PENDING_BYTE_PAGE(p->pSrc->pBt) ){ DbPage *pSrcPg; /* Source page object */ - rc = sqlite3PagerGet(pSrcPager, iSrcPg, &pSrcPg); + rc = sqlite3PagerAcquire(pSrcPager, iSrcPg, &pSrcPg, + PAGER_ACQUIRE_READONLY); if( rc==SQLITE_OK ){ rc = backupOnePage(p, iSrcPg, sqlite3PagerGetData(pSrcPg), 0); sqlite3PagerUnref(pSrcPg); @@ -62445,14 +63653,6 @@ end_of_step: return (rc&db->errMask); } -/* -** The maximum number of times that a statement will try to reparse -** itself before giving up and returning SQLITE_SCHEMA. -*/ -#ifndef SQLITE_MAX_SCHEMA_RETRY -# define SQLITE_MAX_SCHEMA_RETRY 5 -#endif - /* ** This is the top-level implementation of sqlite3_step(). Call ** sqlite3Step() to do most of the work. If a schema error occurs, @@ -63356,6 +64556,11 @@ static int findNextHostParameter(const char *zSql, int *pnToken){ ** then the returned string holds a copy of zRawSql with "-- " prepended ** to each line of text. ** +** If the SQLITE_TRACE_SIZE_LIMIT macro is defined to an integer, then +** then long strings and blobs are truncated to that many bytes. This +** can be used to prevent unreasonably large trace strings when dealing +** with large (multi-megabyte) strings and blobs. +** ** The calling function is responsible for making sure the memory returned ** is eventually freed. ** @@ -63426,30 +64631,49 @@ SQLITE_PRIVATE char *sqlite3VdbeExpandSql( }else if( pVar->flags & MEM_Real ){ sqlite3XPrintf(&out, "%!.15g", pVar->r); }else if( pVar->flags & MEM_Str ){ + int nOut; /* Number of bytes of the string text to include in output */ #ifndef SQLITE_OMIT_UTF16 u8 enc = ENC(db); + Mem utf8; if( enc!=SQLITE_UTF8 ){ - Mem utf8; memset(&utf8, 0, sizeof(utf8)); utf8.db = db; sqlite3VdbeMemSetStr(&utf8, pVar->z, pVar->n, enc, SQLITE_STATIC); sqlite3VdbeChangeEncoding(&utf8, SQLITE_UTF8); - sqlite3XPrintf(&out, "'%.*q'", utf8.n, utf8.z); - sqlite3VdbeMemRelease(&utf8); - }else -#endif - { - sqlite3XPrintf(&out, "'%.*q'", pVar->n, pVar->z); + pVar = &utf8; } +#endif + nOut = pVar->n; +#ifdef SQLITE_TRACE_SIZE_LIMIT + if( nOut>SQLITE_TRACE_SIZE_LIMIT ){ + nOut = SQLITE_TRACE_SIZE_LIMIT; + while( nOutn && (pVar->z[nOut]&0xc0)==0x80 ){ nOut++; } + } +#endif + sqlite3XPrintf(&out, "'%.*q'", nOut, pVar->z); +#ifdef SQLITE_TRACE_SIZE_LIMIT + if( nOutn ) sqlite3XPrintf(&out, "/*+%d bytes*/", pVar->n-nOut); +#endif +#ifndef SQLITE_OMIT_UTF16 + if( enc!=SQLITE_UTF8 ) sqlite3VdbeMemRelease(&utf8); +#endif }else if( pVar->flags & MEM_Zero ){ sqlite3XPrintf(&out, "zeroblob(%d)", pVar->u.nZero); }else{ + int nOut; /* Number of bytes of the blob to include in output */ assert( pVar->flags & MEM_Blob ); sqlite3StrAccumAppend(&out, "x'", 2); - for(i=0; in; i++){ + nOut = pVar->n; +#ifdef SQLITE_TRACE_SIZE_LIMIT + if( nOut>SQLITE_TRACE_SIZE_LIMIT ) nOut = SQLITE_TRACE_SIZE_LIMIT; +#endif + for(i=0; iz[i]&0xff); } sqlite3StrAccumAppend(&out, "'", 1); +#ifdef SQLITE_TRACE_SIZE_LIMIT + if( nOutn ) sqlite3XPrintf(&out, "/*+%d bytes*/", pVar->n-nOut); +#endif } } } @@ -67666,7 +68890,7 @@ case OP_SeekGt: { /* jump, in3 */ ** u.bc.r.flags = 0; ** } */ - u.bc.r.flags = (u16)(UNPACKED_INCRKEY * (1 & (u.bc.oc - OP_SeekLt))); + u.bc.r.flags = (u8)(UNPACKED_INCRKEY * (1 & (u.bc.oc - OP_SeekLt))); assert( u.bc.oc!=OP_SeekGt || u.bc.r.flags==UNPACKED_INCRKEY ); assert( u.bc.oc!=OP_SeekLe || u.bc.r.flags==UNPACKED_INCRKEY ); assert( u.bc.oc!=OP_SeekGe || u.bc.r.flags==0 ); @@ -70791,7 +72015,7 @@ SQLITE_API int sqlite3_blob_open( } sqlite3_bind_int64(pBlob->pStmt, 1, iRow); rc = blobSeekToRow(pBlob, iRow, &zErr); - } while( (++nAttempt)<5 && rc==SQLITE_SCHEMA ); + } while( (++nAttempt)mallocFailed==0 ){ @@ -72476,7 +73700,9 @@ static const struct sqlite3_io_methods MemJournalMethods = { 0, /* xShmMap */ 0, /* xShmLock */ 0, /* xShmBarrier */ - 0 /* xShmUnlock */ + 0, /* xShmUnmap */ + 0, /* xFetch */ + 0 /* xUnfetch */ }; /* @@ -72620,7 +73846,9 @@ SQLITE_PRIVATE int sqlite3WalkSelectFrom(Walker *pWalker, Select *p){ /* ** Call sqlite3WalkExpr() for every expression in Select statement p. ** Invoke sqlite3WalkSelect() for subqueries in the FROM clause and -** on the compound select chain, p->pPrior. +** on the compound select chain, p->pPrior. Invoke the xSelectCallback() +** either before or after the walk of expressions and FROM clause, depending +** on whether pWalker->bSelectDepthFirst is false or true, respectively. ** ** Return WRC_Continue under normal conditions. Return WRC_Abort if ** there is an abort request. @@ -72634,14 +73862,23 @@ SQLITE_PRIVATE int sqlite3WalkSelect(Walker *pWalker, Select *p){ rc = WRC_Continue; pWalker->walkerDepth++; while( p ){ - rc = pWalker->xSelectCallback(pWalker, p); - if( rc ) break; + if( !pWalker->bSelectDepthFirst ){ + rc = pWalker->xSelectCallback(pWalker, p); + if( rc ) break; + } if( sqlite3WalkSelectExpr(pWalker, p) || sqlite3WalkSelectFrom(pWalker, p) ){ pWalker->walkerDepth--; return WRC_Abort; } + if( pWalker->bSelectDepthFirst ){ + rc = pWalker->xSelectCallback(pWalker, p); + /* Depth-first search is currently only used for + ** selectAddSubqueryTypeInfo() and that routine always returns + ** WRC_Continue (0). So the following branch is never taken. */ + if( NEVER(rc) ) break; + } p = p->pPrior; } pWalker->walkerDepth--; @@ -73039,7 +74276,10 @@ static int lookupName( ** Note that the expression in the result set should have already been ** resolved by the time the WHERE clause is resolved. */ - if( cnt==0 && (pEList = pNC->pEList)!=0 && zTab==0 ){ + if( (pEList = pNC->pEList)!=0 + && zTab==0 + && ((pNC->ncFlags & NC_AsMaybe)==0 || cnt==0) + ){ for(j=0; jnExpr; j++){ char *zAs = pEList->a[j].zName; if( zAs!=0 && sqlite3StrICmp(zAs, zCol)==0 ){ @@ -73130,7 +74370,9 @@ static int lookupName( lookupname_end: if( cnt==1 ){ assert( pNC!=0 ); - sqlite3AuthRead(pParse, pExpr, pSchema, pNC->pSrcList); + if( pExpr->op!=TK_AS ){ + sqlite3AuthRead(pParse, pExpr, pSchema, pNC->pSrcList); + } /* Increment the nRef value on all name contexts from TopNC up to ** the point where the name matched. */ for(;;){ @@ -73805,11 +75047,10 @@ static int resolveSelectStep(Walker *pWalker, Select *p){ ** re-evaluated for each reference to it. */ sNC.pEList = p->pEList; - if( sqlite3ResolveExprNames(&sNC, p->pWhere) || - sqlite3ResolveExprNames(&sNC, p->pHaving) - ){ - return WRC_Abort; - } + sNC.ncFlags |= NC_AsMaybe; + if( sqlite3ResolveExprNames(&sNC, p->pHaving) ) return WRC_Abort; + if( sqlite3ResolveExprNames(&sNC, p->pWhere) ) return WRC_Abort; + sNC.ncFlags &= ~NC_AsMaybe; /* The ORDER BY and GROUP BY clauses may not refer to terms in ** outer queries @@ -73930,6 +75171,7 @@ SQLITE_PRIVATE int sqlite3ResolveExprNames( #endif savedHasAgg = pNC->ncFlags & NC_HasAgg; pNC->ncFlags &= ~NC_HasAgg; + memset(&w, 0, sizeof(w)); w.xExprCallback = resolveExprStep; w.xSelectCallback = resolveSelectStep; w.pParse = pNC->pParse; @@ -73970,6 +75212,7 @@ SQLITE_PRIVATE void sqlite3ResolveSelectNames( Walker w; assert( p!=0 ); + memset(&w, 0, sizeof(w)); w.xExprCallback = resolveExprStep; w.xSelectCallback = resolveSelectStep; w.pParse = pParse; @@ -74096,12 +75339,7 @@ SQLITE_PRIVATE CollSeq *sqlite3ExprCollSeq(Parse *pParse, Expr *pExpr){ } assert( op!=TK_REGISTER || p->op2!=TK_COLLATE ); if( op==TK_COLLATE ){ - if( db->init.busy ){ - /* Do not report errors when parsing while the schema */ - pColl = sqlite3FindCollSeq(db, ENC(db), p->u.zToken, 0); - }else{ - pColl = sqlite3GetCollSeq(pParse, ENC(db), 0, p->u.zToken); - } + pColl = sqlite3GetCollSeq(pParse, ENC(db), 0, p->u.zToken); break; } if( p->pTab!=0 @@ -75194,6 +76432,7 @@ static int selectNodeIsConstant(Walker *pWalker, Select *NotUsed){ } static int exprIsConst(Expr *p, int initFlag){ Walker w; + memset(&w, 0, sizeof(w)); w.u.i = initFlag; w.xExprCallback = exprNodeIsConstant; w.xSelectCallback = selectNodeIsConstant; @@ -77408,8 +78647,8 @@ SQLITE_PRIVATE void sqlite3ExprCodeConstants(Parse *pParse, Expr *pExpr){ Walker w; if( pParse->cookieGoto ) return; if( OptimizationDisabled(pParse->db, SQLITE_FactorOutConst) ) return; + memset(&w, 0, sizeof(w)); w.xExprCallback = evalConstExpr; - w.xSelectCallback = 0; w.pParse = pParse; sqlite3WalkExpr(&w, pExpr); } @@ -83601,10 +84840,8 @@ SQLITE_PRIVATE Index *sqlite3CreateIndex( for(i=0; inExpr; i++){ Expr *pExpr = pList->a[i].pExpr; if( pExpr ){ - CollSeq *pColl = sqlite3ExprCollSeq(pParse, pExpr); - if( pColl ){ - nExtra += (1 + sqlite3Strlen30(pColl->zName)); - } + assert( pExpr->op==TK_COLLATE ); + nExtra += (1 + sqlite3Strlen30(pExpr->u.zToken)); } } @@ -83665,7 +84902,6 @@ SQLITE_PRIVATE Index *sqlite3CreateIndex( const char *zColName = pListItem->zName; Column *pTabCol; int requestedSortOrder; - CollSeq *pColl; /* Collating sequence */ char *zColl; /* Collation sequence name */ for(j=0, pTabCol=pTab->aCol; jnCol; j++, pTabCol++){ @@ -83678,11 +84914,10 @@ SQLITE_PRIVATE Index *sqlite3CreateIndex( goto exit_create_index; } pIndex->aiColumn[i] = j; - if( pListItem->pExpr - && (pColl = sqlite3ExprCollSeq(pParse, pListItem->pExpr))!=0 - ){ + if( pListItem->pExpr ){ int nColl; - zColl = pColl->zName; + assert( pListItem->pExpr->op==TK_COLLATE ); + zColl = pListItem->pExpr->u.zToken; nColl = sqlite3Strlen30(zColl) + 1; assert( nExtra>=nColl ); memcpy(zExtra, zColl, nColl); @@ -83691,9 +84926,7 @@ SQLITE_PRIVATE Index *sqlite3CreateIndex( nExtra -= nColl; }else{ zColl = pTab->aCol[j].zColl; - if( !zColl ){ - zColl = "BINARY"; - } + if( !zColl ) zColl = "BINARY"; } if( !db->init.busy && !sqlite3LocateCollSeq(pParse, zColl) ){ goto exit_create_index; @@ -86612,6 +87845,13 @@ static int patternCompare( return *zString==0; } +/* +** The sqlite3_strglob() interface. +*/ +SQLITE_API int sqlite3_strglob(const char *zGlobPattern, const char *zString){ + return patternCompare((u8*)zGlobPattern, (u8*)zString, &globInfo, 0)==0; +} + /* ** Count the number of times that the LIKE operator (or GLOB which is ** just a variation of LIKE) gets called. This is used for testing @@ -90812,7 +92052,6 @@ SQLITE_API int sqlite3_exec( const char *zLeftover; /* Tail of unprocessed SQL */ sqlite3_stmt *pStmt = 0; /* The current SQL statement */ char **azCols = 0; /* Names of result columns */ - int nRetry = 0; /* Number of retry attempts */ int callbackIsInit; /* True if callback data is initialized */ if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT; @@ -90820,12 +92059,12 @@ SQLITE_API int sqlite3_exec( sqlite3_mutex_enter(db->mutex); sqlite3Error(db, SQLITE_OK, 0); - while( (rc==SQLITE_OK || (rc==SQLITE_SCHEMA && (++nRetry)<2)) && zSql[0] ){ + while( rc==SQLITE_OK && zSql[0] ){ int nCol; char **azVals = 0; pStmt = 0; - rc = sqlite3_prepare(db, zSql, -1, &pStmt, &zLeftover); + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, &zLeftover); assert( rc==SQLITE_OK || pStmt==0 ); if( rc!=SQLITE_OK ){ continue; @@ -90882,11 +92121,8 @@ SQLITE_API int sqlite3_exec( if( rc!=SQLITE_ROW ){ rc = sqlite3VdbeFinalize((Vdbe *)pStmt); pStmt = 0; - if( rc!=SQLITE_SCHEMA ){ - nRetry = 0; - zSql = zLeftover; - while( sqlite3Isspace(zSql[0]) ) zSql++; - } + zSql = zLeftover; + while( sqlite3Isspace(zSql[0]) ) zSql++; break; } } @@ -91410,8 +92646,17 @@ struct sqlite3_api_routines { #define sqlite3_wal_checkpoint_v2 sqlite3_api->wal_checkpoint_v2 #endif /* SQLITE_CORE */ -#define SQLITE_EXTENSION_INIT1 const sqlite3_api_routines *sqlite3_api = 0; -#define SQLITE_EXTENSION_INIT2(v) sqlite3_api = v; +#ifndef SQLITE_CORE + /* This case when the file really is being compiled as a loadable + ** extension */ +# define SQLITE_EXTENSION_INIT1 const sqlite3_api_routines *sqlite3_api=0; +# define SQLITE_EXTENSION_INIT2(v) sqlite3_api=v; +#else + /* This case when the file is being statically linked into the + ** application */ +# define SQLITE_EXTENSION_INIT1 /*no-op*/ +# define SQLITE_EXTENSION_INIT2(v) (void)v; /* unused parameter */ +#endif #endif /* _SQLITE3EXT_H_ */ @@ -91814,8 +93059,23 @@ static int sqlite3LoadExtension( void *handle; int (*xInit)(sqlite3*,char**,const sqlite3_api_routines*); char *zErrmsg = 0; + const char *zEntry; + char *zAltEntry = 0; void **aHandle; int nMsg = 300 + sqlite3Strlen30(zFile); + int ii; + + /* Shared library endings to try if zFile cannot be loaded as written */ + static const char *azEndings[] = { +#if SQLITE_OS_WIN + "dll" +#elif defined(__APPLE__) + "dylib" +#else + "so" +#endif + }; + if( pzErrMsg ) *pzErrMsg = 0; @@ -91832,11 +93092,17 @@ static int sqlite3LoadExtension( return SQLITE_ERROR; } - if( zProc==0 ){ - zProc = "sqlite3_extension_init"; - } + zEntry = zProc ? zProc : "sqlite3_extension_init"; handle = sqlite3OsDlOpen(pVfs, zFile); +#if SQLITE_OS_UNIX || SQLITE_OS_WIN + for(ii=0; ii sqlite3_example_init + ** C:/lib/mathfuncs.dll ==> sqlite3_mathfuncs_init + */ + if( xInit==0 && zProc==0 ){ + int iFile, iEntry, c; + int ncFile = sqlite3Strlen30(zFile); + zAltEntry = sqlite3_malloc(ncFile+30); + if( zAltEntry==0 ){ + sqlite3OsDlClose(pVfs, handle); + return SQLITE_NOMEM; + } + memcpy(zAltEntry, "sqlite3_", 8); + for(iFile=ncFile-1; iFile>=0 && zFile[iFile]!='/'; iFile--){} + iFile++; + if( sqlite3_strnicmp(zFile+iFile, "lib", 3)==0 ) iFile += 3; + for(iEntry=8; (c = zFile[iFile])!=0 && c!='.'; iFile++){ + if( sqlite3Isalpha(c) ){ + zAltEntry[iEntry++] = (char)sqlite3UpperToLower[(unsigned)c]; + } + } + memcpy(zAltEntry+iEntry, "_init", 6); + zEntry = zAltEntry; + xInit = (int(*)(sqlite3*,char**,const sqlite3_api_routines*)) + sqlite3OsDlSym(pVfs, handle, zEntry); + } if( xInit==0 ){ if( pzErrMsg ){ - nMsg += sqlite3Strlen30(zProc); + nMsg += sqlite3Strlen30(zEntry); *pzErrMsg = zErrmsg = sqlite3_malloc(nMsg); if( zErrmsg ){ sqlite3_snprintf(nMsg, zErrmsg, - "no entry point [%s] in shared library [%s]", zProc,zFile); + "no entry point [%s] in shared library [%s]", zEntry, zFile); sqlite3OsDlError(pVfs, nMsg-1, zErrmsg); } - sqlite3OsDlClose(pVfs, handle); } + sqlite3OsDlClose(pVfs, handle); + sqlite3_free(zAltEntry); return SQLITE_ERROR; - }else if( xInit(db, &zErrmsg, &sqlite3Apis) ){ + } + sqlite3_free(zAltEntry); + if( xInit(db, &zErrmsg, &sqlite3Apis) ){ if( pzErrMsg ){ *pzErrMsg = sqlite3_mprintf("error during initialization: %s", zErrmsg); } @@ -92391,7 +93694,7 @@ SQLITE_PRIVATE void sqlite3Pragma( int rc; /* return value form SQLITE_FCNTL_PRAGMA */ sqlite3 *db = pParse->db; /* The database connection */ Db *pDb; /* The specific database being pragmaed */ - Vdbe *v = pParse->pVdbe = sqlite3VdbeCreate(db); /* Prepared statement */ + Vdbe *v = sqlite3GetVdbe(pParse); /* Prepared statement */ if( v==0 ) return; sqlite3VdbeRunOnlyOnce(v); @@ -92474,11 +93777,12 @@ SQLITE_PRIVATE void sqlite3Pragma( static const VdbeOpList getCacheSize[] = { { OP_Transaction, 0, 0, 0}, /* 0 */ { OP_ReadCookie, 0, 1, BTREE_DEFAULT_CACHE_SIZE}, /* 1 */ - { OP_IfPos, 1, 7, 0}, + { OP_IfPos, 1, 8, 0}, { OP_Integer, 0, 2, 0}, { OP_Subtract, 1, 2, 1}, - { OP_IfPos, 1, 7, 0}, + { OP_IfPos, 1, 8, 0}, { OP_Integer, 0, 1, 0}, /* 6 */ + { OP_Noop, 0, 0, 0}, { OP_ResultRow, 1, 1, 0}, }; int addr; @@ -92816,6 +94120,43 @@ SQLITE_PRIVATE void sqlite3Pragma( } }else + /* + ** PRAGMA [database.]mmap_size(N) + ** + ** Used to set mapping size limit. The mapping size limit is + ** used to limit the aggregate size of all memory mapped regions of the + ** database file. If this parameter is set to zero, then memory mapping + ** is not used at all. If N is negative, then the default memory map + ** limit determined by sqlite3_config(SQLITE_CONFIG_MMAP_SIZE) is set. + ** The parameter N is measured in bytes. + ** + ** This value is advisory. The underlying VFS is free to memory map + ** as little or as much as it wants. Except, if N is set to 0 then the + ** upper layers will never invoke the xFetch interfaces to the VFS. + */ + if( sqlite3StrICmp(zLeft,"mmap_size")==0 ){ + sqlite3_int64 sz; + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + if( zRight ){ + int ii; + sqlite3Atoi64(zRight, &sz, 1000, SQLITE_UTF8); + if( sz<0 ) sz = sqlite3GlobalConfig.szMmap; + if( pId2->n==0 ) db->szMmap = sz; + for(ii=db->nDb-1; ii>=0; ii--){ + if( db->aDb[ii].pBt && (ii==iDb || pId2->n==0) ){ + sqlite3BtreeSetMmapLimit(db->aDb[ii].pBt, sz); + } + } + } + sz = -1; + if( sqlite3_file_control(db,zDb,SQLITE_FCNTL_MMAP_SIZE,&sz)==SQLITE_OK ){ +#if SQLITE_MAX_MMAP_SIZE==0 + sz = 0; +#endif + returnSingleInt(pParse, "mmap_size", sz); + } + }else + /* ** PRAGMA temp_store ** PRAGMA temp_store = "default"|"memory"|"file" @@ -93601,6 +94942,11 @@ SQLITE_PRIVATE void sqlite3Pragma( ** PRAGMA [database.]user_version ** PRAGMA [database.]user_version = ** + ** PRAGMA [database.]freelist_count = + ** + ** PRAGMA [database.]application_id + ** PRAGMA [database.]application_id = + ** ** The pragma's schema_version and user_version are used to set or get ** the value of the schema-version and user-version, respectively. Both ** the schema-version and the user-version are 32-bit signed integers @@ -93622,10 +94968,14 @@ SQLITE_PRIVATE void sqlite3Pragma( if( sqlite3StrICmp(zLeft, "schema_version")==0 || sqlite3StrICmp(zLeft, "user_version")==0 || sqlite3StrICmp(zLeft, "freelist_count")==0 + || sqlite3StrICmp(zLeft, "application_id")==0 ){ int iCookie; /* Cookie index. 1 for schema-cookie, 6 for user-cookie. */ sqlite3VdbeUsesBtree(v, iDb); switch( zLeft[0] ){ + case 'a': case 'A': + iCookie = BTREE_APPLICATION_ID; + break; case 'f': case 'F': iCookie = BTREE_FREE_PAGE_COUNT; break; @@ -94506,7 +95856,6 @@ static int sqlite3Prepare( } #endif - assert( db->init.busy==0 || saveSqlFlag==0 ); if( db->init.busy==0 ){ Vdbe *pVdbe = pParse->pVdbe; sqlite3VdbeSetSql(pVdbe, zSql, (int)(pParse->zTail-zSql), saveSqlFlag); @@ -97982,6 +99331,69 @@ SQLITE_PRIVATE int sqlite3IndexedByLookup(Parse *pParse, struct SrcList_item *pF } return SQLITE_OK; } +/* +** Detect compound SELECT statements that use an ORDER BY clause with +** an alternative collating sequence. +** +** SELECT ... FROM t1 EXCEPT SELECT ... FROM t2 ORDER BY .. COLLATE ... +** +** These are rewritten as a subquery: +** +** SELECT * FROM (SELECT ... FROM t1 EXCEPT SELECT ... FROM t2) +** ORDER BY ... COLLATE ... +** +** This transformation is necessary because the multiSelectOrderBy() routine +** above that generates the code for a compound SELECT with an ORDER BY clause +** uses a merge algorithm that requires the same collating sequence on the +** result columns as on the ORDER BY clause. See ticket +** http://www.sqlite.org/src/info/6709574d2a +** +** This transformation is only needed for EXCEPT, INTERSECT, and UNION. +** The UNION ALL operator works fine with multiSelectOrderBy() even when +** there are COLLATE terms in the ORDER BY. +*/ +static int convertCompoundSelectToSubquery(Walker *pWalker, Select *p){ + int i; + Select *pNew; + Select *pX; + sqlite3 *db; + struct ExprList_item *a; + SrcList *pNewSrc; + Parse *pParse; + Token dummy; + + if( p->pPrior==0 ) return WRC_Continue; + if( p->pOrderBy==0 ) return WRC_Continue; + for(pX=p; pX && (pX->op==TK_ALL || pX->op==TK_SELECT); pX=pX->pPrior){} + if( pX==0 ) return WRC_Continue; + a = p->pOrderBy->a; + for(i=p->pOrderBy->nExpr-1; i>=0; i--){ + if( a[i].pExpr->flags & EP_Collate ) break; + } + if( i<0 ) return WRC_Continue; + + /* If we reach this point, that means the transformation is required. */ + + pParse = pWalker->pParse; + db = pParse->db; + pNew = sqlite3DbMallocZero(db, sizeof(*pNew) ); + if( pNew==0 ) return WRC_Abort; + memset(&dummy, 0, sizeof(dummy)); + pNewSrc = sqlite3SrcListAppendFromTerm(pParse,0,0,0,&dummy,pNew,0,0); + if( pNewSrc==0 ) return WRC_Abort; + *pNew = *p; + p->pSrc = pNewSrc; + p->pEList = sqlite3ExprListAppend(pParse, 0, sqlite3Expr(db, TK_ALL, 0)); + p->op = TK_SELECT; + p->pWhere = 0; + pNew->pGroupBy = 0; + pNew->pHaving = 0; + pNew->pOrderBy = 0; + p->pPrior = 0; + pNew->pLimit = 0; + pNew->pOffset = 0; + return WRC_Continue; +} /* ** This routine is a Walker callback for "expanding" a SELECT statement. @@ -98298,10 +99710,13 @@ static int exprWalkNoop(Walker *NotUsed, Expr *NotUsed2){ */ static void sqlite3SelectExpand(Parse *pParse, Select *pSelect){ Walker w; - w.xSelectCallback = selectExpander; + memset(&w, 0, sizeof(w)); + w.xSelectCallback = convertCompoundSelectToSubquery; w.xExprCallback = exprWalkNoop; w.pParse = pParse; sqlite3WalkSelect(&w, pSelect); + w.xSelectCallback = selectExpander; + sqlite3WalkSelect(&w, pSelect); } @@ -98356,9 +99771,11 @@ static int selectAddSubqueryTypeInfo(Walker *pWalker, Select *p){ static void sqlite3SelectAddTypeInfo(Parse *pParse, Select *pSelect){ #ifndef SQLITE_OMIT_SUBQUERY Walker w; + memset(&w, 0, sizeof(w)); w.xSelectCallback = selectAddSubqueryTypeInfo; w.xExprCallback = exprWalkNoop; w.pParse = pParse; + w.bSelectDepthFirst = 1; sqlite3WalkSelect(&w, pSelect); #endif } @@ -98769,7 +100186,7 @@ SQLITE_PRIVATE int sqlite3Select( pItem->addrFillSub = topAddr+1; VdbeNoopComment((v, "materialize %s", pItem->pTab->zName)); if( pItem->isCorrelated==0 ){ - /* If the subquery is no correlated and if we are not inside of + /* If the subquery is not correlated and if we are not inside of ** a trigger, then we only need to compute the value of the subquery ** once. */ onceAddr = sqlite3CodeOnce(pParse); @@ -101035,6 +102452,7 @@ SQLITE_PRIVATE void sqlite3Update( } if( j>=pTab->nCol ){ if( sqlite3IsRowid(pChanges->a[i].zName) ){ + j = -1; chngRowid = 1; pRowidExpr = pChanges->a[i].pExpr; }else{ @@ -101047,7 +102465,8 @@ SQLITE_PRIVATE void sqlite3Update( { int rc; rc = sqlite3AuthCheck(pParse, SQLITE_UPDATE, pTab->zName, - pTab->aCol[j].zName, db->aDb[iDb].zName); + j<0 ? "ROWID" : pTab->aCol[j].zName, + db->aDb[iDb].zName); if( rc==SQLITE_DENY ){ goto update_cleanup; }else if( rc==SQLITE_IGNORE ){ @@ -101790,6 +103209,7 @@ SQLITE_PRIVATE int sqlite3RunVacuum(char **pzErrMsg, sqlite3 *db){ BTREE_DEFAULT_CACHE_SIZE, 0, /* Preserve the default page cache size */ BTREE_TEXT_ENCODING, 0, /* Preserve the text encoding */ BTREE_USER_VERSION, 0, /* Preserve the user version */ + BTREE_APPLICATION_ID, 0, /* Preserve the application id */ }; assert( 1==sqlite3BtreeIsInTrans(pTemp) ); @@ -103657,7 +105077,7 @@ static WhereTerm *findTerm( continue; } } - if( pTerm->prereqRight==0 ){ + if( pTerm->prereqRight==0 && (pTerm->eOperator&WO_EQ)!=0 ){ pResult = pTerm; goto findTerm_success; }else if( pResult==0 ){ @@ -105227,9 +106647,8 @@ static void bestVirtualIndex(WhereBestIdx *p){ struct sqlite3_index_constraint *pIdxCons; struct sqlite3_index_constraint_usage *pUsage; WhereTerm *pTerm; - int i, j, k; + int i, j; int nOrderBy; - int sortOrder; /* Sort order for IN clauses */ int bAllowIN; /* Allow IN optimizations */ double rCost; @@ -105328,7 +106747,6 @@ static void bestVirtualIndex(WhereBestIdx *p){ return; } - sortOrder = SQLITE_SO_ASC; pIdxCons = *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint; for(i=0; inConstraint; i++, pIdxCons++){ if( pUsage[i].argvIndex>0 ){ @@ -105343,17 +106761,28 @@ static void bestVirtualIndex(WhereBestIdx *p){ ** repeated in the output. */ break; } - for(k=0; knOrderBy; k++){ - if( pIdxInfo->aOrderBy[k].iColumn==pIdxCons->iColumn ){ - sortOrder = pIdxInfo->aOrderBy[k].desc; - break; - } - } + /* A virtual table that is constrained by an IN clause may not + ** consume the ORDER BY clause because (1) the order of IN terms + ** is not necessarily related to the order of output terms and + ** (2) Multiple outputs from a single IN value will not merge + ** together. */ + pIdxInfo->orderByConsumed = 0; } } } if( i>=pIdxInfo->nConstraint ) break; } + + /* The orderByConsumed signal is only valid if all outer loops collectively + ** generate just a single row of output. + */ + if( pIdxInfo->orderByConsumed ){ + for(i=0; ii; i++){ + if( (p->aLevel[i].plan.wsFlags & WHERE_UNIQUE)==0 ){ + pIdxInfo->orderByConsumed = 0; + } + } + } /* If there is an ORDER BY clause, and the selected virtual table index ** does not satisfy it, increase the cost of the scan accordingly. This @@ -105378,8 +106807,7 @@ static void bestVirtualIndex(WhereBestIdx *p){ } p->cost.plan.u.pVtabIdx = pIdxInfo; if( pIdxInfo->orderByConsumed ){ - assert( sortOrder==0 || sortOrder==1 ); - p->cost.plan.wsFlags |= WHERE_ORDERED + sortOrder*WHERE_REVERSE; + p->cost.plan.wsFlags |= WHERE_ORDERED; p->cost.plan.nOBSat = nOrderBy; }else{ p->cost.plan.nOBSat = p->i ? p->aLevel[p->i-1].plan.nOBSat : 0; @@ -107116,6 +108544,7 @@ static Bitmask codeOneLoopStart( int addrCont; /* Jump here to continue with next cycle */ int iRowidReg = 0; /* Rowid is stored in this register, if not zero */ int iReleaseReg = 0; /* Temp register to free before returning */ + Bitmask newNotReady; /* Return value */ pParse = pWInfo->pParse; v = pParse->pVdbe; @@ -107126,6 +108555,7 @@ static Bitmask codeOneLoopStart( bRev = (pLevel->plan.wsFlags & WHERE_REVERSE)!=0; omitTable = (pLevel->plan.wsFlags & WHERE_IDX_ONLY)!=0 && (wctrlFlags & WHERE_FORCE_TABLE)==0; + VdbeNoopComment((v, "Begin Join Loop %d", iLevel)); /* Create labels for the "break" and "continue" instructions ** for the current loop. Jump to addrBrk to break out of a loop. @@ -107668,6 +109098,10 @@ static Bitmask codeOneLoopStart( ** the "interesting" terms of z - terms that did not originate in the ** ON or USING clause of a LEFT JOIN, and terms that are usable as ** indices. + ** + ** This optimization also only applies if the (x1 OR x2 OR ...) term + ** is not contained in the ON clause of a LEFT JOIN. + ** See ticket http://www.sqlite.org/src/info/f2369304e4 */ if( pWC->nTerm>1 ){ int iTerm; @@ -107689,7 +109123,7 @@ static Bitmask codeOneLoopStart( if( pOrTerm->leftCursor==iCur || (pOrTerm->eOperator & WO_AND)!=0 ){ WhereInfo *pSubWInfo; /* Info for single OR-term scan */ Expr *pOrExpr = pOrTerm->pExpr; - if( pAndExpr ){ + if( pAndExpr && !ExprHasProperty(pOrExpr, EP_FromJoin) ){ pAndExpr->pLeft = pOrExpr; pOrExpr = pAndExpr; } @@ -107776,7 +109210,7 @@ static Bitmask codeOneLoopStart( pLevel->p2 = 1 + sqlite3VdbeAddOp2(v, aStart[bRev], iCur, addrBrk); pLevel->p5 = SQLITE_STMTSTATUS_FULLSCAN_STEP; } - notReady &= ~getMask(pWC->pMaskSet, iCur); + newNotReady = notReady & ~getMask(pWC->pMaskSet, iCur); /* Insert code to test every subexpression that can be completely ** computed using the current set of tables. @@ -107790,7 +109224,7 @@ static Bitmask codeOneLoopStart( testcase( pTerm->wtFlags & TERM_VIRTUAL ); /* IMP: R-30575-11662 */ testcase( pTerm->wtFlags & TERM_CODED ); if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; - if( (pTerm->prereqAll & notReady)!=0 ){ + if( (pTerm->prereqAll & newNotReady)!=0 ){ testcase( pWInfo->untestedTerms==0 && (pWInfo->wctrlFlags & WHERE_ONETABLE_ONLY)!=0 ); pWInfo->untestedTerms = 1; @@ -107805,6 +109239,33 @@ static Bitmask codeOneLoopStart( pTerm->wtFlags |= TERM_CODED; } + /* Insert code to test for implied constraints based on transitivity + ** of the "==" operator. + ** + ** Example: If the WHERE clause contains "t1.a=t2.b" and "t2.b=123" + ** and we are coding the t1 loop and the t2 loop has not yet coded, + ** then we cannot use the "t1.a=t2.b" constraint, but we can code + ** the implied "t1.a=123" constraint. + */ + for(pTerm=pWC->a, j=pWC->nTerm; j>0; j--, pTerm++){ + Expr *pE; + WhereTerm *pAlt; + Expr sEq; + if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; + if( pTerm->eOperator!=(WO_EQUIV|WO_EQ) ) continue; + if( pTerm->leftCursor!=iCur ) continue; + pE = pTerm->pExpr; + assert( !ExprHasProperty(pE, EP_FromJoin) ); + assert( (pTerm->prereqRight & newNotReady)!=0 ); + pAlt = findTerm(pWC, iCur, pTerm->u.leftColumn, notReady, WO_EQ|WO_IN, 0); + if( pAlt==0 ) continue; + if( pAlt->wtFlags & (TERM_CODED) ) continue; + VdbeNoopComment((v, "begin transitive constraint")); + sEq = *pAlt->pExpr; + sEq.pLeft = pE->pLeft; + sqlite3ExprIfFalse(pParse, &sEq, addrCont, SQLITE_JUMPIFNULL); + } + /* For a LEFT OUTER JOIN, generate code that will record the fact that ** at least one row of the right table has matched the left table. */ @@ -107817,7 +109278,7 @@ static Bitmask codeOneLoopStart( testcase( pTerm->wtFlags & TERM_VIRTUAL ); /* IMP: R-30575-11662 */ testcase( pTerm->wtFlags & TERM_CODED ); if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; - if( (pTerm->prereqAll & notReady)!=0 ){ + if( (pTerm->prereqAll & newNotReady)!=0 ){ assert( pWInfo->untestedTerms ); continue; } @@ -107828,7 +109289,7 @@ static Bitmask codeOneLoopStart( } sqlite3ReleaseTempReg(pParse, iReleaseReg); - return notReady; + return newNotReady; } #if defined(SQLITE_TEST) @@ -111146,7 +112607,9 @@ static void yy_reduce( struct SrcList_item *pOld = yymsp[-4].minor.yy347->a; pNew->zName = pOld->zName; pNew->zDatabase = pOld->zDatabase; + pNew->pSelect = pOld->pSelect; pOld->zName = pOld->zDatabase = 0; + pOld->pSelect = 0; } sqlite3SrcListDelete(pParse->db, yymsp[-4].minor.yy347); }else{ @@ -113814,6 +115277,19 @@ SQLITE_API int sqlite3_config(int op, ...){ } #endif + case SQLITE_CONFIG_MMAP_SIZE: { + sqlite3_int64 szMmap = va_arg(ap, sqlite3_int64); + sqlite3_int64 mxMmap = va_arg(ap, sqlite3_int64); + if( mxMmap<0 || mxMmap>SQLITE_MAX_MMAP_SIZE ){ + mxMmap = SQLITE_MAX_MMAP_SIZE; + } + sqlite3GlobalConfig.mxMmap = mxMmap; + if( szMmap<0 ) szMmap = SQLITE_DEFAULT_MMAP_SIZE; + if( szMmap>mxMmap) szMmap = mxMmap; + sqlite3GlobalConfig.szMmap = szMmap; + break; + } + default: { rc = SQLITE_ERROR; break; @@ -114207,6 +115683,12 @@ SQLITE_PRIVATE void sqlite3LeaveMutexAndCloseZombie(sqlite3 *db){ ** go ahead and free all resources. */ + /* If a transaction is open, roll it back. This also ensures that if + ** any database schemas have been modified by an uncommitted transaction + ** they are reset. And that the required b-tree mutex is held to make + ** the pager rollback and schema reset an atomic operation. */ + sqlite3RollbackAll(db, SQLITE_OK); + /* Free any outstanding Savepoint structures. */ sqlite3CloseSavepoints(db); @@ -114307,6 +115789,15 @@ SQLITE_PRIVATE void sqlite3RollbackAll(sqlite3 *db, int tripCode){ int inTrans = 0; assert( sqlite3_mutex_held(db->mutex) ); sqlite3BeginBenignMalloc(); + + /* Obtain all b-tree mutexes before making any calls to BtreeRollback(). + ** This is important in case the transaction being rolled back has + ** modified the database schema. If the b-tree mutexes are not taken + ** here, then another shared-cache connection might sneak in between + ** the database rollback and schema reset, which can cause false + ** corruption reports in some cases. */ + sqlite3BtreeEnterAll(db); + for(i=0; inDb; i++){ Btree *p = db->aDb[i].pBt; if( p ){ @@ -114324,6 +115815,7 @@ SQLITE_PRIVATE void sqlite3RollbackAll(sqlite3 *db, int tripCode){ sqlite3ExpirePreparedStatements(db); sqlite3ResetAllSchemasOfConnection(db); } + sqlite3BtreeLeaveAll(db); /* Any deferred constraint violations have now been resolved. */ db->nDeferredCons = 0; @@ -114334,6 +115826,110 @@ SQLITE_PRIVATE void sqlite3RollbackAll(sqlite3 *db, int tripCode){ } } +/* +** Return a static string containing the name corresponding to the error code +** specified in the argument. +*/ +#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) || \ + defined(SQLITE_DEBUG_OS_TRACE) +SQLITE_PRIVATE const char *sqlite3ErrName(int rc){ + const char *zName = 0; + int i, origRc = rc; + for(i=0; i<2 && zName==0; i++, rc &= 0xff){ + switch( rc ){ + case SQLITE_OK: zName = "SQLITE_OK"; break; + case SQLITE_ERROR: zName = "SQLITE_ERROR"; break; + case SQLITE_INTERNAL: zName = "SQLITE_INTERNAL"; break; + case SQLITE_PERM: zName = "SQLITE_PERM"; break; + case SQLITE_ABORT: zName = "SQLITE_ABORT"; break; + case SQLITE_ABORT_ROLLBACK: zName = "SQLITE_ABORT_ROLLBACK"; break; + case SQLITE_BUSY: zName = "SQLITE_BUSY"; break; + case SQLITE_BUSY_RECOVERY: zName = "SQLITE_BUSY_RECOVERY"; break; + case SQLITE_LOCKED: zName = "SQLITE_LOCKED"; break; + case SQLITE_LOCKED_SHAREDCACHE: zName = "SQLITE_LOCKED_SHAREDCACHE";break; + case SQLITE_NOMEM: zName = "SQLITE_NOMEM"; break; + case SQLITE_READONLY: zName = "SQLITE_READONLY"; break; + case SQLITE_READONLY_RECOVERY: zName = "SQLITE_READONLY_RECOVERY"; break; + case SQLITE_READONLY_CANTLOCK: zName = "SQLITE_READONLY_CANTLOCK"; break; + case SQLITE_READONLY_ROLLBACK: zName = "SQLITE_READONLY_ROLLBACK"; break; + case SQLITE_INTERRUPT: zName = "SQLITE_INTERRUPT"; break; + case SQLITE_IOERR: zName = "SQLITE_IOERR"; break; + case SQLITE_IOERR_READ: zName = "SQLITE_IOERR_READ"; break; + case SQLITE_IOERR_SHORT_READ: zName = "SQLITE_IOERR_SHORT_READ"; break; + case SQLITE_IOERR_WRITE: zName = "SQLITE_IOERR_WRITE"; break; + case SQLITE_IOERR_FSYNC: zName = "SQLITE_IOERR_FSYNC"; break; + case SQLITE_IOERR_DIR_FSYNC: zName = "SQLITE_IOERR_DIR_FSYNC"; break; + case SQLITE_IOERR_TRUNCATE: zName = "SQLITE_IOERR_TRUNCATE"; break; + case SQLITE_IOERR_FSTAT: zName = "SQLITE_IOERR_FSTAT"; break; + case SQLITE_IOERR_UNLOCK: zName = "SQLITE_IOERR_UNLOCK"; break; + case SQLITE_IOERR_RDLOCK: zName = "SQLITE_IOERR_RDLOCK"; break; + case SQLITE_IOERR_DELETE: zName = "SQLITE_IOERR_DELETE"; break; + case SQLITE_IOERR_BLOCKED: zName = "SQLITE_IOERR_BLOCKED"; break; + case SQLITE_IOERR_NOMEM: zName = "SQLITE_IOERR_NOMEM"; break; + case SQLITE_IOERR_ACCESS: zName = "SQLITE_IOERR_ACCESS"; break; + case SQLITE_IOERR_CHECKRESERVEDLOCK: + zName = "SQLITE_IOERR_CHECKRESERVEDLOCK"; break; + case SQLITE_IOERR_LOCK: zName = "SQLITE_IOERR_LOCK"; break; + case SQLITE_IOERR_CLOSE: zName = "SQLITE_IOERR_CLOSE"; break; + case SQLITE_IOERR_DIR_CLOSE: zName = "SQLITE_IOERR_DIR_CLOSE"; break; + case SQLITE_IOERR_SHMOPEN: zName = "SQLITE_IOERR_SHMOPEN"; break; + case SQLITE_IOERR_SHMSIZE: zName = "SQLITE_IOERR_SHMSIZE"; break; + case SQLITE_IOERR_SHMLOCK: zName = "SQLITE_IOERR_SHMLOCK"; break; + case SQLITE_IOERR_SHMMAP: zName = "SQLITE_IOERR_SHMMAP"; break; + case SQLITE_IOERR_SEEK: zName = "SQLITE_IOERR_SEEK"; break; + case SQLITE_IOERR_DELETE_NOENT: zName = "SQLITE_IOERR_DELETE_NOENT";break; + case SQLITE_IOERR_MMAP: zName = "SQLITE_IOERR_MMAP"; break; + case SQLITE_CORRUPT: zName = "SQLITE_CORRUPT"; break; + case SQLITE_CORRUPT_VTAB: zName = "SQLITE_CORRUPT_VTAB"; break; + case SQLITE_NOTFOUND: zName = "SQLITE_NOTFOUND"; break; + case SQLITE_FULL: zName = "SQLITE_FULL"; break; + case SQLITE_CANTOPEN: zName = "SQLITE_CANTOPEN"; break; + case SQLITE_CANTOPEN_NOTEMPDIR: zName = "SQLITE_CANTOPEN_NOTEMPDIR";break; + case SQLITE_CANTOPEN_ISDIR: zName = "SQLITE_CANTOPEN_ISDIR"; break; + case SQLITE_CANTOPEN_FULLPATH: zName = "SQLITE_CANTOPEN_FULLPATH"; break; + case SQLITE_PROTOCOL: zName = "SQLITE_PROTOCOL"; break; + case SQLITE_EMPTY: zName = "SQLITE_EMPTY"; break; + case SQLITE_SCHEMA: zName = "SQLITE_SCHEMA"; break; + case SQLITE_TOOBIG: zName = "SQLITE_TOOBIG"; break; + case SQLITE_CONSTRAINT: zName = "SQLITE_CONSTRAINT"; break; + case SQLITE_CONSTRAINT_UNIQUE: zName = "SQLITE_CONSTRAINT_UNIQUE"; break; + case SQLITE_CONSTRAINT_TRIGGER: zName = "SQLITE_CONSTRAINT_TRIGGER";break; + case SQLITE_CONSTRAINT_FOREIGNKEY: + zName = "SQLITE_CONSTRAINT_FOREIGNKEY"; break; + case SQLITE_CONSTRAINT_CHECK: zName = "SQLITE_CONSTRAINT_CHECK"; break; + case SQLITE_CONSTRAINT_PRIMARYKEY: + zName = "SQLITE_CONSTRAINT_PRIMARYKEY"; break; + case SQLITE_CONSTRAINT_NOTNULL: zName = "SQLITE_CONSTRAINT_NOTNULL";break; + case SQLITE_CONSTRAINT_COMMITHOOK: + zName = "SQLITE_CONSTRAINT_COMMITHOOK"; break; + case SQLITE_CONSTRAINT_VTAB: zName = "SQLITE_CONSTRAINT_VTAB"; break; + case SQLITE_CONSTRAINT_FUNCTION: + zName = "SQLITE_CONSTRAINT_FUNCTION"; break; + case SQLITE_MISMATCH: zName = "SQLITE_MISMATCH"; break; + case SQLITE_MISUSE: zName = "SQLITE_MISUSE"; break; + case SQLITE_NOLFS: zName = "SQLITE_NOLFS"; break; + case SQLITE_AUTH: zName = "SQLITE_AUTH"; break; + case SQLITE_FORMAT: zName = "SQLITE_FORMAT"; break; + case SQLITE_RANGE: zName = "SQLITE_RANGE"; break; + case SQLITE_NOTADB: zName = "SQLITE_NOTADB"; break; + case SQLITE_ROW: zName = "SQLITE_ROW"; break; + case SQLITE_NOTICE: zName = "SQLITE_NOTICE"; break; + case SQLITE_NOTICE_RECOVER_WAL: zName = "SQLITE_NOTICE_RECOVER_WAL";break; + case SQLITE_NOTICE_RECOVER_ROLLBACK: + zName = "SQLITE_NOTICE_RECOVER_ROLLBACK"; break; + case SQLITE_WARNING: zName = "SQLITE_WARNING"; break; + case SQLITE_DONE: zName = "SQLITE_DONE"; break; + } + } + if( zName==0 ){ + static char zBuf[50]; + sqlite3_snprintf(sizeof(zBuf), zBuf, "SQLITE_UNKNOWN(%d)", origRc); + zName = zBuf; + } + return zName; +} +#endif + /* ** Return a static string that describes the kind of error specified in the ** argument. @@ -115634,6 +117230,7 @@ static int openDatabase( memcpy(db->aLimit, aHardLimit, sizeof(db->aLimit)); db->autoCommit = 1; db->nextAutovac = -1; + db->szMmap = sqlite3GlobalConfig.szMmap; db->nextPagesize = 0; db->flags |= SQLITE_ShortColNames | SQLITE_AutoIndex | SQLITE_EnableTrigger #if SQLITE_DEFAULT_FILE_FORMAT<4 @@ -117950,7 +119547,7 @@ SQLITE_PRIVATE void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const /* fts3_expr.c */ SQLITE_PRIVATE int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int, - char **, int, int, int, const char *, int, Fts3Expr ** + char **, int, int, int, const char *, int, Fts3Expr **, char ** ); SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *); #ifdef SQLITE_TEST @@ -117975,6 +119572,9 @@ SQLITE_PRIVATE int sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iC SQLITE_PRIVATE int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *); SQLITE_PRIVATE int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr); +/* fts3_tokenize_vtab.c */ +SQLITE_PRIVATE int sqlite3Fts3InitTok(sqlite3*, Fts3Hash *); + /* fts3_unicode2.c (functions generated by parsing unicode text files) */ #ifdef SQLITE_ENABLE_FTS4_UNICODE61 SQLITE_PRIVATE int sqlite3FtsUnicodeFold(int, int); @@ -120671,14 +122271,12 @@ static int fts3FilterMethod( pCsr->iLangid = 0; if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]); + assert( p->base.zErrMsg==0 ); rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid, - p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr + p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr, + &p->base.zErrMsg ); if( rc!=SQLITE_OK ){ - if( rc==SQLITE_ERROR ){ - static const char *zErr = "malformed MATCH expression: [%s]"; - p->base.zErrMsg = sqlite3_mprintf(zErr, zQuery); - } return rc; } @@ -121342,9 +122940,13 @@ SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db){ db, "fts4", &fts3Module, (void *)pHash, 0 ); } + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3InitTok(db, (void *)pHash); + } return rc; } + /* An error has occurred. Delete the hash table and return the error code. */ assert( rc!=SQLITE_OK ); if( pHash ){ @@ -123118,17 +124720,26 @@ static int fts3auxConnectMethod( UNUSED_PARAMETER(pUnused); - /* The user should specify a single argument - the name of an fts3 table. */ - if( argc!=4 ){ - *pzErr = sqlite3_mprintf( - "wrong number of arguments to fts4aux constructor" - ); - return SQLITE_ERROR; - } + /* The user should invoke this in one of two forms: + ** + ** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table); + ** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table-db, fts4-table); + */ + if( argc!=4 && argc!=5 ) goto bad_args; zDb = argv[1]; nDb = (int)strlen(zDb); - zFts3 = argv[3]; + if( argc==5 ){ + if( nDb==4 && 0==sqlite3_strnicmp("temp", zDb, 4) ){ + zDb = argv[3]; + nDb = (int)strlen(zDb); + zFts3 = argv[4]; + }else{ + goto bad_args; + } + }else{ + zFts3 = argv[3]; + } nFts3 = (int)strlen(zFts3); rc = sqlite3_declare_vtab(db, FTS3_TERMS_SCHEMA); @@ -123151,6 +124762,10 @@ static int fts3auxConnectMethod( *ppVtab = (sqlite3_vtab *)p; return SQLITE_OK; + + bad_args: + *pzErr = sqlite3_mprintf("invalid arguments to fts4aux constructor"); + return SQLITE_ERROR; } /* @@ -124164,8 +125779,10 @@ static int fts3ExprParse( } pNot->eType = FTSQUERY_NOT; pNot->pRight = p; + p->pParent = pNot; if( pNotBranch ){ pNot->pLeft = pNotBranch; + pNotBranch->pParent = pNot; } pNotBranch = pNot; p = pPrev; @@ -124253,6 +125870,7 @@ static int fts3ExprParse( pIter = pIter->pLeft; } pIter->pLeft = pRet; + pRet->pParent = pIter; pRet = pNotBranch; } } @@ -124269,6 +125887,223 @@ exprparse_out: return rc; } +/* +** Return SQLITE_ERROR if the maximum depth of the expression tree passed +** as the only argument is more than nMaxDepth. +*/ +static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){ + int rc = SQLITE_OK; + if( p ){ + if( nMaxDepth<0 ){ + rc = SQLITE_TOOBIG; + }else{ + rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1); + if( rc==SQLITE_OK ){ + rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1); + } + } + } + return rc; +} + +/* +** This function attempts to transform the expression tree at (*pp) to +** an equivalent but more balanced form. The tree is modified in place. +** If successful, SQLITE_OK is returned and (*pp) set to point to the +** new root expression node. +** +** nMaxDepth is the maximum allowable depth of the balanced sub-tree. +** +** Otherwise, if an error occurs, an SQLite error code is returned and +** expression (*pp) freed. +*/ +static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){ + int rc = SQLITE_OK; /* Return code */ + Fts3Expr *pRoot = *pp; /* Initial root node */ + Fts3Expr *pFree = 0; /* List of free nodes. Linked by pParent. */ + int eType = pRoot->eType; /* Type of node in this tree */ + + if( nMaxDepth==0 ){ + rc = SQLITE_ERROR; + } + + if( rc==SQLITE_OK && (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){ + Fts3Expr **apLeaf; + apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth); + if( 0==apLeaf ){ + rc = SQLITE_NOMEM; + }else{ + memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth); + } + + if( rc==SQLITE_OK ){ + int i; + Fts3Expr *p; + + /* Set $p to point to the left-most leaf in the tree of eType nodes. */ + for(p=pRoot; p->eType==eType; p=p->pLeft){ + assert( p->pParent==0 || p->pParent->pLeft==p ); + assert( p->pLeft && p->pRight ); + } + + /* This loop runs once for each leaf in the tree of eType nodes. */ + while( 1 ){ + int iLvl; + Fts3Expr *pParent = p->pParent; /* Current parent of p */ + + assert( pParent==0 || pParent->pLeft==p ); + p->pParent = 0; + if( pParent ){ + pParent->pLeft = 0; + }else{ + pRoot = 0; + } + rc = fts3ExprBalance(&p, nMaxDepth-1); + if( rc!=SQLITE_OK ) break; + + for(iLvl=0; p && iLvlpLeft = apLeaf[iLvl]; + pFree->pRight = p; + pFree->pLeft->pParent = pFree; + pFree->pRight->pParent = pFree; + + p = pFree; + pFree = pFree->pParent; + p->pParent = 0; + apLeaf[iLvl] = 0; + } + } + if( p ){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_TOOBIG; + break; + } + + /* If that was the last leaf node, break out of the loop */ + if( pParent==0 ) break; + + /* Set $p to point to the next leaf in the tree of eType nodes */ + for(p=pParent->pRight; p->eType==eType; p=p->pLeft); + + /* Remove pParent from the original tree. */ + assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent ); + pParent->pRight->pParent = pParent->pParent; + if( pParent->pParent ){ + pParent->pParent->pLeft = pParent->pRight; + }else{ + assert( pParent==pRoot ); + pRoot = pParent->pRight; + } + + /* Link pParent into the free node list. It will be used as an + ** internal node of the new tree. */ + pParent->pParent = pFree; + pFree = pParent; + } + + if( rc==SQLITE_OK ){ + p = 0; + for(i=0; ipParent = 0; + }else{ + assert( pFree!=0 ); + pFree->pRight = p; + pFree->pLeft = apLeaf[i]; + pFree->pLeft->pParent = pFree; + pFree->pRight->pParent = pFree; + + p = pFree; + pFree = pFree->pParent; + p->pParent = 0; + } + } + } + pRoot = p; + }else{ + /* An error occurred. Delete the contents of the apLeaf[] array + ** and pFree list. Everything else is cleaned up by the call to + ** sqlite3Fts3ExprFree(pRoot) below. */ + Fts3Expr *pDel; + for(i=0; ipParent; + sqlite3_free(pDel); + } + } + + assert( pFree==0 ); + sqlite3_free( apLeaf ); + } + } + + if( rc!=SQLITE_OK ){ + sqlite3Fts3ExprFree(pRoot); + pRoot = 0; + } + *pp = pRoot; + return rc; +} + +/* +** This function is similar to sqlite3Fts3ExprParse(), with the following +** differences: +** +** 1. It does not do expression rebalancing. +** 2. It does not check that the expression does not exceed the +** maximum allowable depth. +** 3. Even if it fails, *ppExpr may still be set to point to an +** expression tree. It should be deleted using sqlite3Fts3ExprFree() +** in this case. +*/ +static int fts3ExprParseUnbalanced( + sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ + int iLangid, /* Language id for tokenizer */ + char **azCol, /* Array of column names for fts3 table */ + int bFts4, /* True to allow FTS4-only syntax */ + int nCol, /* Number of entries in azCol[] */ + int iDefaultCol, /* Default column to query */ + const char *z, int n, /* Text of MATCH query */ + Fts3Expr **ppExpr /* OUT: Parsed query structure */ +){ + int nParsed; + int rc; + ParseContext sParse; + + memset(&sParse, 0, sizeof(ParseContext)); + sParse.pTokenizer = pTokenizer; + sParse.iLangid = iLangid; + sParse.azCol = (const char **)azCol; + sParse.nCol = nCol; + sParse.iDefaultCol = iDefaultCol; + sParse.bFts4 = bFts4; + if( z==0 ){ + *ppExpr = 0; + return SQLITE_OK; + } + if( n<0 ){ + n = (int)strlen(z); + } + rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); + assert( rc==SQLITE_OK || *ppExpr==0 ); + + /* Check for mismatched parenthesis */ + if( rc==SQLITE_OK && sParse.nNest ){ + rc = SQLITE_ERROR; + } + + return rc; +} + /* ** Parameters z and n contain a pointer to and length of a buffer containing ** an fts3 query expression, respectively. This function attempts to parse the @@ -124301,49 +126136,74 @@ SQLITE_PRIVATE int sqlite3Fts3ExprParse( int nCol, /* Number of entries in azCol[] */ int iDefaultCol, /* Default column to query */ const char *z, int n, /* Text of MATCH query */ - Fts3Expr **ppExpr /* OUT: Parsed query structure */ + Fts3Expr **ppExpr, /* OUT: Parsed query structure */ + char **pzErr /* OUT: Error message (sqlite3_malloc) */ ){ - int nParsed; - int rc; - ParseContext sParse; - - memset(&sParse, 0, sizeof(ParseContext)); - sParse.pTokenizer = pTokenizer; - sParse.iLangid = iLangid; - sParse.azCol = (const char **)azCol; - sParse.nCol = nCol; - sParse.iDefaultCol = iDefaultCol; - sParse.bFts4 = bFts4; - if( z==0 ){ - *ppExpr = 0; - return SQLITE_OK; + static const int MAX_EXPR_DEPTH = 12; + int rc = fts3ExprParseUnbalanced( + pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr + ); + + /* Rebalance the expression. And check that its depth does not exceed + ** MAX_EXPR_DEPTH. */ + if( rc==SQLITE_OK && *ppExpr ){ + rc = fts3ExprBalance(ppExpr, MAX_EXPR_DEPTH); + if( rc==SQLITE_OK ){ + rc = fts3ExprCheckDepth(*ppExpr, MAX_EXPR_DEPTH); + } } - if( n<0 ){ - n = (int)strlen(z); - } - rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); - /* Check for mismatched parenthesis */ - if( rc==SQLITE_OK && sParse.nNest ){ - rc = SQLITE_ERROR; + if( rc!=SQLITE_OK ){ sqlite3Fts3ExprFree(*ppExpr); *ppExpr = 0; + if( rc==SQLITE_TOOBIG ){ + *pzErr = sqlite3_mprintf( + "FTS expression tree is too large (maximum depth %d)", MAX_EXPR_DEPTH + ); + rc = SQLITE_ERROR; + }else if( rc==SQLITE_ERROR ){ + *pzErr = sqlite3_mprintf("malformed MATCH expression: [%s]", z); + } } return rc; } /* -** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). +** Free a single node of an expression tree. */ -SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *p){ - if( p ){ - assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); - sqlite3Fts3ExprFree(p->pLeft); - sqlite3Fts3ExprFree(p->pRight); - sqlite3Fts3EvalPhraseCleanup(p->pPhrase); - sqlite3_free(p->aMI); - sqlite3_free(p); +static void fts3FreeExprNode(Fts3Expr *p){ + assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); + sqlite3Fts3EvalPhraseCleanup(p->pPhrase); + sqlite3_free(p->aMI); + sqlite3_free(p); +} + +/* +** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). +** +** This function would be simpler if it recursively called itself. But +** that would mean passing a sufficiently large expression to ExprParse() +** could cause a stack overflow. +*/ +SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *pDel){ + Fts3Expr *p; + assert( pDel==0 || pDel->pParent==0 ); + for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){ + assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft ); + } + while( p ){ + Fts3Expr *pParent = p->pParent; + fts3FreeExprNode(p); + if( pParent && p==pParent->pLeft && pParent->pRight ){ + p = pParent->pRight; + while( p && (p->pLeft || p->pRight) ){ + assert( p==p->pParent->pRight || p==p->pParent->pLeft ); + p = (p->pLeft ? p->pLeft : p->pRight); + } + }else{ + p = pParent; + } } } @@ -124395,6 +126255,9 @@ static int queryTestTokenizer( ** the returned expression text and then freed using sqlite3_free(). */ static char *exprToString(Fts3Expr *pExpr, char *zBuf){ + if( pExpr==0 ){ + return sqlite3_mprintf(""); + } switch( pExpr->eType ){ case FTSQUERY_PHRASE: { Fts3Phrase *pPhrase = pExpr->pPhrase; @@ -124502,10 +126365,21 @@ static void fts3ExprTest( azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]); } - rc = sqlite3Fts3ExprParse( - pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr - ); + if( sqlite3_user_data(context) ){ + char *zDummy = 0; + rc = sqlite3Fts3ExprParse( + pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy + ); + assert( rc==SQLITE_OK || pExpr==0 ); + sqlite3_free(zDummy); + }else{ + rc = fts3ExprParseUnbalanced( + pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr + ); + } + if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ + sqlite3Fts3ExprFree(pExpr); sqlite3_result_error(context, "Error parsing expression", -1); }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){ sqlite3_result_error_nomem(context); @@ -124528,9 +126402,15 @@ exprtest_out: ** with database connection db. */ SQLITE_PRIVATE int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ - return sqlite3_create_function( + int rc = sqlite3_create_function( db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0 ); + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function(db, "fts3_exprtest_rebalance", + -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0 + ); + } + return rc; } #endif @@ -126293,6 +128173,462 @@ SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule( #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ /************** End of fts3_tokenizer1.c *************************************/ +/************** Begin file fts3_tokenize_vtab.c ******************************/ +/* +** 2013 Apr 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains code for the "fts3tokenize" virtual table module. +** An fts3tokenize virtual table is created as follows: +** +** CREATE VIRTUAL TABLE USING fts3tokenize( +** , , ... +** ); +** +** The table created has the following schema: +** +** CREATE TABLE (input, token, start, end, position) +** +** When queried, the query must include a WHERE clause of type: +** +** input = +** +** The virtual table module tokenizes this , using the FTS3 +** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE +** statement and returns one row for each token in the result. With +** fields set as follows: +** +** input: Always set to a copy of +** token: A token from the input. +** start: Byte offset of the token within the input . +** end: Byte offset of the byte immediately following the end of the +** token within the input string. +** pos: Token offset of token within input. +** +*/ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +/* #include */ +/* #include */ + +typedef struct Fts3tokTable Fts3tokTable; +typedef struct Fts3tokCursor Fts3tokCursor; + +/* +** Virtual table structure. +*/ +struct Fts3tokTable { + sqlite3_vtab base; /* Base class used by SQLite core */ + const sqlite3_tokenizer_module *pMod; + sqlite3_tokenizer *pTok; +}; + +/* +** Virtual table cursor structure. +*/ +struct Fts3tokCursor { + sqlite3_vtab_cursor base; /* Base class used by SQLite core */ + char *zInput; /* Input string */ + sqlite3_tokenizer_cursor *pCsr; /* Cursor to iterate through zInput */ + int iRowid; /* Current 'rowid' value */ + const char *zToken; /* Current 'token' value */ + int nToken; /* Size of zToken in bytes */ + int iStart; /* Current 'start' value */ + int iEnd; /* Current 'end' value */ + int iPos; /* Current 'pos' value */ +}; + +/* +** Query FTS for the tokenizer implementation named zName. +*/ +static int fts3tokQueryTokenizer( + Fts3Hash *pHash, + const char *zName, + const sqlite3_tokenizer_module **pp, + char **pzErr +){ + sqlite3_tokenizer_module *p; + int nName = (int)strlen(zName); + + p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); + if( !p ){ + *pzErr = sqlite3_mprintf("unknown tokenizer: %s", zName); + return SQLITE_ERROR; + } + + *pp = p; + return SQLITE_OK; +} + +/* +** The second argument, argv[], is an array of pointers to nul-terminated +** strings. This function makes a copy of the array and strings into a +** single block of memory. It then dequotes any of the strings that appear +** to be quoted. +** +** If successful, output parameter *pazDequote is set to point at the +** array of dequoted strings and SQLITE_OK is returned. The caller is +** responsible for eventually calling sqlite3_free() to free the array +** in this case. Or, if an error occurs, an SQLite error code is returned. +** The final value of *pazDequote is undefined in this case. +*/ +static int fts3tokDequoteArray( + int argc, /* Number of elements in argv[] */ + const char * const *argv, /* Input array */ + char ***pazDequote /* Output array */ +){ + int rc = SQLITE_OK; /* Return code */ + if( argc==0 ){ + *pazDequote = 0; + }else{ + int i; + int nByte = 0; + char **azDequote; + + for(i=0; ixCreate((nDequote>1 ? nDequote-1 : 0), azArg, &pTok); + } + + if( rc==SQLITE_OK ){ + pTab = (Fts3tokTable *)sqlite3_malloc(sizeof(Fts3tokTable)); + if( pTab==0 ){ + rc = SQLITE_NOMEM; + } + } + + if( rc==SQLITE_OK ){ + memset(pTab, 0, sizeof(Fts3tokTable)); + pTab->pMod = pMod; + pTab->pTok = pTok; + *ppVtab = &pTab->base; + }else{ + if( pTok ){ + pMod->xDestroy(pTok); + } + } + + sqlite3_free(azDequote); + return rc; +} + +/* +** This function does the work for both the xDisconnect and xDestroy methods. +** These tables have no persistent representation of their own, so xDisconnect +** and xDestroy are identical operations. +*/ +static int fts3tokDisconnectMethod(sqlite3_vtab *pVtab){ + Fts3tokTable *pTab = (Fts3tokTable *)pVtab; + + pTab->pMod->xDestroy(pTab->pTok); + sqlite3_free(pTab); + return SQLITE_OK; +} + +/* +** xBestIndex - Analyze a WHERE and ORDER BY clause. +*/ +static int fts3tokBestIndexMethod( + sqlite3_vtab *pVTab, + sqlite3_index_info *pInfo +){ + int i; + UNUSED_PARAMETER(pVTab); + + for(i=0; inConstraint; i++){ + if( pInfo->aConstraint[i].usable + && pInfo->aConstraint[i].iColumn==0 + && pInfo->aConstraint[i].op==SQLITE_INDEX_CONSTRAINT_EQ + ){ + pInfo->idxNum = 1; + pInfo->aConstraintUsage[i].argvIndex = 1; + pInfo->aConstraintUsage[i].omit = 1; + pInfo->estimatedCost = 1; + return SQLITE_OK; + } + } + + pInfo->idxNum = 0; + assert( pInfo->estimatedCost>1000000.0 ); + + return SQLITE_OK; +} + +/* +** xOpen - Open a cursor. +*/ +static int fts3tokOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ + Fts3tokCursor *pCsr; + UNUSED_PARAMETER(pVTab); + + pCsr = (Fts3tokCursor *)sqlite3_malloc(sizeof(Fts3tokCursor)); + if( pCsr==0 ){ + return SQLITE_NOMEM; + } + memset(pCsr, 0, sizeof(Fts3tokCursor)); + + *ppCsr = (sqlite3_vtab_cursor *)pCsr; + return SQLITE_OK; +} + +/* +** Reset the tokenizer cursor passed as the only argument. As if it had +** just been returned by fts3tokOpenMethod(). +*/ +static void fts3tokResetCursor(Fts3tokCursor *pCsr){ + if( pCsr->pCsr ){ + Fts3tokTable *pTab = (Fts3tokTable *)(pCsr->base.pVtab); + pTab->pMod->xClose(pCsr->pCsr); + pCsr->pCsr = 0; + } + sqlite3_free(pCsr->zInput); + pCsr->zInput = 0; + pCsr->zToken = 0; + pCsr->nToken = 0; + pCsr->iStart = 0; + pCsr->iEnd = 0; + pCsr->iPos = 0; + pCsr->iRowid = 0; +} + +/* +** xClose - Close a cursor. +*/ +static int fts3tokCloseMethod(sqlite3_vtab_cursor *pCursor){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + + fts3tokResetCursor(pCsr); + sqlite3_free(pCsr); + return SQLITE_OK; +} + +/* +** xNext - Advance the cursor to the next row, if any. +*/ +static int fts3tokNextMethod(sqlite3_vtab_cursor *pCursor){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab); + int rc; /* Return code */ + + pCsr->iRowid++; + rc = pTab->pMod->xNext(pCsr->pCsr, + &pCsr->zToken, &pCsr->nToken, + &pCsr->iStart, &pCsr->iEnd, &pCsr->iPos + ); + + if( rc!=SQLITE_OK ){ + fts3tokResetCursor(pCsr); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + } + + return rc; +} + +/* +** xFilter - Initialize a cursor to point at the start of its data. +*/ +static int fts3tokFilterMethod( + sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ + int idxNum, /* Strategy index */ + const char *idxStr, /* Unused */ + int nVal, /* Number of elements in apVal */ + sqlite3_value **apVal /* Arguments for the indexing scheme */ +){ + int rc = SQLITE_ERROR; + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab); + UNUSED_PARAMETER(idxStr); + UNUSED_PARAMETER(nVal); + + fts3tokResetCursor(pCsr); + if( idxNum==1 ){ + const char *zByte = (const char *)sqlite3_value_text(apVal[0]); + int nByte = sqlite3_value_bytes(apVal[0]); + pCsr->zInput = sqlite3_malloc(nByte+1); + if( pCsr->zInput==0 ){ + rc = SQLITE_NOMEM; + }else{ + memcpy(pCsr->zInput, zByte, nByte); + pCsr->zInput[nByte] = 0; + rc = pTab->pMod->xOpen(pTab->pTok, pCsr->zInput, nByte, &pCsr->pCsr); + if( rc==SQLITE_OK ){ + pCsr->pCsr->pTokenizer = pTab->pTok; + } + } + } + + if( rc!=SQLITE_OK ) return rc; + return fts3tokNextMethod(pCursor); +} + +/* +** xEof - Return true if the cursor is at EOF, or false otherwise. +*/ +static int fts3tokEofMethod(sqlite3_vtab_cursor *pCursor){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + return (pCsr->zToken==0); +} + +/* +** xColumn - Return a column value. +*/ +static int fts3tokColumnMethod( + sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ + sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ + int iCol /* Index of column to read value from */ +){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + + /* CREATE TABLE x(input, token, start, end, position) */ + switch( iCol ){ + case 0: + sqlite3_result_text(pCtx, pCsr->zInput, -1, SQLITE_TRANSIENT); + break; + case 1: + sqlite3_result_text(pCtx, pCsr->zToken, pCsr->nToken, SQLITE_TRANSIENT); + break; + case 2: + sqlite3_result_int(pCtx, pCsr->iStart); + break; + case 3: + sqlite3_result_int(pCtx, pCsr->iEnd); + break; + default: + assert( iCol==4 ); + sqlite3_result_int(pCtx, pCsr->iPos); + break; + } + return SQLITE_OK; +} + +/* +** xRowid - Return the current rowid for the cursor. +*/ +static int fts3tokRowidMethod( + sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ + sqlite_int64 *pRowid /* OUT: Rowid value */ +){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + *pRowid = (sqlite3_int64)pCsr->iRowid; + return SQLITE_OK; +} + +/* +** Register the fts3tok module with database connection db. Return SQLITE_OK +** if successful or an error code if sqlite3_create_module() fails. +*/ +SQLITE_PRIVATE int sqlite3Fts3InitTok(sqlite3 *db, Fts3Hash *pHash){ + static const sqlite3_module fts3tok_module = { + 0, /* iVersion */ + fts3tokConnectMethod, /* xCreate */ + fts3tokConnectMethod, /* xConnect */ + fts3tokBestIndexMethod, /* xBestIndex */ + fts3tokDisconnectMethod, /* xDisconnect */ + fts3tokDisconnectMethod, /* xDestroy */ + fts3tokOpenMethod, /* xOpen */ + fts3tokCloseMethod, /* xClose */ + fts3tokFilterMethod, /* xFilter */ + fts3tokNextMethod, /* xNext */ + fts3tokEofMethod, /* xEof */ + fts3tokColumnMethod, /* xColumn */ + fts3tokRowidMethod, /* xRowid */ + 0, /* xUpdate */ + 0, /* xBegin */ + 0, /* xSync */ + 0, /* xCommit */ + 0, /* xRollback */ + 0, /* xFindFunction */ + 0, /* xRename */ + 0, /* xSavepoint */ + 0, /* xRelease */ + 0 /* xRollbackTo */ + }; + int rc; /* Return code */ + + rc = sqlite3_create_module(db, "fts3tokenize", &fts3tok_module, (void*)pHash); + return rc; +} + +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ + +/************** End of fts3_tokenize_vtab.c **********************************/ /************** Begin file fts3_write.c **************************************/ /* ** 2009 Oct 23 diff --git a/src/3rdparty/sqlite3.h b/src/3rdparty/sqlite3.h index 69b4586a3f..e398838287 100644 --- a/src/3rdparty/sqlite3.h +++ b/src/3rdparty/sqlite3.h @@ -107,9 +107,9 @@ extern "C" { ** [sqlite3_libversion_number()], [sqlite3_sourceid()], ** [sqlite_version()] and [sqlite_source_id()]. */ -#define SQLITE_VERSION "3.7.16.2" -#define SQLITE_VERSION_NUMBER 3007016 -#define SQLITE_SOURCE_ID "2013-04-12 11:52:43 cbea02d93865ce0e06789db95fd9168ebac970c7" +#define SQLITE_VERSION "3.7.17" +#define SQLITE_VERSION_NUMBER 3007017 +#define SQLITE_SOURCE_ID "2013-05-20 00:56:22 118a3b35693b134d56ebd780123b7fd6f1497668" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -425,6 +425,8 @@ SQLITE_API int sqlite3_exec( #define SQLITE_FORMAT 24 /* Auxiliary database format error */ #define SQLITE_RANGE 25 /* 2nd parameter to sqlite3_bind out of range */ #define SQLITE_NOTADB 26 /* File opened that is not a database file */ +#define SQLITE_NOTICE 27 /* Notifications from sqlite3_log() */ +#define SQLITE_WARNING 28 /* Warnings from sqlite3_log() */ #define SQLITE_ROW 100 /* sqlite3_step() has another row ready */ #define SQLITE_DONE 101 /* sqlite3_step() has finished executing */ /* end-of-error-codes */ @@ -475,6 +477,7 @@ SQLITE_API int sqlite3_exec( #define SQLITE_IOERR_SHMMAP (SQLITE_IOERR | (21<<8)) #define SQLITE_IOERR_SEEK (SQLITE_IOERR | (22<<8)) #define SQLITE_IOERR_DELETE_NOENT (SQLITE_IOERR | (23<<8)) +#define SQLITE_IOERR_MMAP (SQLITE_IOERR | (24<<8)) #define SQLITE_LOCKED_SHAREDCACHE (SQLITE_LOCKED | (1<<8)) #define SQLITE_BUSY_RECOVERY (SQLITE_BUSY | (1<<8)) #define SQLITE_CANTOPEN_NOTEMPDIR (SQLITE_CANTOPEN | (1<<8)) @@ -494,6 +497,8 @@ SQLITE_API int sqlite3_exec( #define SQLITE_CONSTRAINT_TRIGGER (SQLITE_CONSTRAINT | (7<<8)) #define SQLITE_CONSTRAINT_UNIQUE (SQLITE_CONSTRAINT | (8<<8)) #define SQLITE_CONSTRAINT_VTAB (SQLITE_CONSTRAINT | (9<<8)) +#define SQLITE_NOTICE_RECOVER_WAL (SQLITE_NOTICE | (1<<8)) +#define SQLITE_NOTICE_RECOVER_ROLLBACK (SQLITE_NOTICE | (2<<8)) /* ** CAPI3REF: Flags For File Open Operations @@ -733,6 +738,9 @@ struct sqlite3_io_methods { void (*xShmBarrier)(sqlite3_file*); int (*xShmUnmap)(sqlite3_file*, int deleteFlag); /* Methods above are valid for version 2 */ + int (*xFetch)(sqlite3_file*, sqlite3_int64 iOfst, int iAmt, void **pp); + int (*xUnfetch)(sqlite3_file*, sqlite3_int64 iOfst, void *p); + /* Methods above are valid for version 3 */ /* Additional methods may be added in future releases */ }; @@ -869,7 +877,8 @@ struct sqlite3_io_methods { ** it is able to override built-in [PRAGMA] statements. ** **
  • [[SQLITE_FCNTL_BUSYHANDLER]] -** ^This file-control may be invoked by SQLite on the database file handle +** ^The [SQLITE_FCNTL_BUSYHANDLER] +** file-control may be invoked by SQLite on the database file handle ** shortly after it is opened in order to provide a custom VFS with access ** to the connections busy-handler callback. The argument is of type (void **) ** - an array of two (void *) values. The first (void *) actually points @@ -880,13 +889,24 @@ struct sqlite3_io_methods { ** current operation. ** **
  • [[SQLITE_FCNTL_TEMPFILENAME]] -** ^Application can invoke this file-control to have SQLite generate a +** ^Application can invoke the [SQLITE_FCNTL_TEMPFILENAME] file-control +** to have SQLite generate a ** temporary filename using the same algorithm that is followed to generate ** temporary filenames for TEMP tables and other internal uses. The ** argument should be a char** which will be filled with the filename ** written into memory obtained from [sqlite3_malloc()]. The caller should ** invoke [sqlite3_free()] on the result to avoid a memory leak. ** +**
  • [[SQLITE_FCNTL_MMAP_SIZE]] +** The [SQLITE_FCNTL_MMAP_SIZE] file control is used to query or set the +** maximum number of bytes that will be used for memory-mapped I/O. +** The argument is a pointer to a value of type sqlite3_int64 that +** is an advisory maximum number of bytes in the file to memory map. The +** pointer is overwritten with the old value. The limit is not changed if +** the value originally pointed to is negative, and so the current limit +** can be queried by passing in a pointer to a negative number. This +** file-control is used internally to implement [PRAGMA mmap_size]. +** ** */ #define SQLITE_FCNTL_LOCKSTATE 1 @@ -905,6 +925,7 @@ struct sqlite3_io_methods { #define SQLITE_FCNTL_PRAGMA 14 #define SQLITE_FCNTL_BUSYHANDLER 15 #define SQLITE_FCNTL_TEMPFILENAME 16 +#define SQLITE_FCNTL_MMAP_SIZE 18 /* ** CAPI3REF: Mutex Handle @@ -1571,7 +1592,9 @@ struct sqlite3_mem_methods { ** page cache implementation into that object.)^
  • ** ** [[SQLITE_CONFIG_LOG]]
    SQLITE_CONFIG_LOG
    -**
    ^The SQLITE_CONFIG_LOG option takes two arguments: a pointer to a +**
    The SQLITE_CONFIG_LOG option is used to configure the SQLite +** global [error log]. +** (^The SQLITE_CONFIG_LOG option takes two arguments: a pointer to a ** function with a call signature of void(*)(void*,int,const char*), ** and a pointer to void. ^If the function pointer is not NULL, it is ** invoked by [sqlite3_log()] to process each logging event. ^If the @@ -1617,12 +1640,12 @@ struct sqlite3_mem_methods { **
    SQLITE_CONFIG_PCACHE and SQLITE_CONFIG_GETPCACHE **
    These options are obsolete and should not be used by new code. ** They are retained for backwards compatibility but are now no-ops. -** +**
    ** ** [[SQLITE_CONFIG_SQLLOG]] **
    SQLITE_CONFIG_SQLLOG **
    This option is only available if sqlite is compiled with the -** SQLITE_ENABLE_SQLLOG pre-processor macro defined. The first argument should +** [SQLITE_ENABLE_SQLLOG] pre-processor macro defined. The first argument should ** be a pointer to a function of type void(*)(void*,sqlite3*,const char*, int). ** The second should be of type (void*). The callback is invoked by the library ** in three separate circumstances, identified by the value passed as the @@ -1632,7 +1655,23 @@ struct sqlite3_mem_methods { ** fourth parameter is 1, then the SQL statement that the third parameter ** points to has just been executed. Or, if the fourth parameter is 2, then ** the connection being passed as the second parameter is being closed. The -** third parameter is passed NULL In this case. +** third parameter is passed NULL In this case. An example of using this +** configuration option can be seen in the "test_sqllog.c" source file in +** the canonical SQLite source tree.
    +** +** [[SQLITE_CONFIG_MMAP_SIZE]] +**
    SQLITE_CONFIG_MMAP_SIZE +**
    SQLITE_CONFIG_MMAP_SIZE takes two 64-bit integer (sqlite3_int64) values +** that are the default mmap size limit (the default setting for +** [PRAGMA mmap_size]) and the maximum allowed mmap size limit. +** The default setting can be overridden by each database connection using +** either the [PRAGMA mmap_size] command, or by using the +** [SQLITE_FCNTL_MMAP_SIZE] file control. The maximum allowed mmap size +** cannot be changed at run-time. Nor may the maximum allowed mmap size +** exceed the compile-time maximum mmap size set by the +** [SQLITE_MAX_MMAP_SIZE] compile-time option. +** If either argument to this option is negative, then that argument is +** changed to its compile-time default. ** */ #define SQLITE_CONFIG_SINGLETHREAD 1 /* nil */ @@ -1656,6 +1695,7 @@ struct sqlite3_mem_methods { #define SQLITE_CONFIG_GETPCACHE2 19 /* sqlite3_pcache_methods2* */ #define SQLITE_CONFIG_COVERING_INDEX_SCAN 20 /* int */ #define SQLITE_CONFIG_SQLLOG 21 /* xSqllog, void* */ +#define SQLITE_CONFIG_MMAP_SIZE 22 /* sqlite3_int64, sqlite3_int64 */ /* ** CAPI3REF: Database Connection Configuration Options @@ -2489,6 +2529,9 @@ SQLITE_API int sqlite3_set_authorizer( ** as each triggered subprogram is entered. The callbacks for triggers ** contain a UTF-8 SQL comment that identifies the trigger.)^ ** +** The [SQLITE_TRACE_SIZE_LIMIT] compile-time option can be used to limit +** the length of [bound parameter] expansion in the output of sqlite3_trace(). +** ** ^The callback function registered by sqlite3_profile() is invoked ** as each SQL statement finishes. ^The profile callback contains ** the original statement text and an estimate of wall-clock time @@ -3027,7 +3070,8 @@ SQLITE_API int sqlite3_limit(sqlite3*, int id, int newVal); **
  • ** ^If the database schema changes, instead of returning [SQLITE_SCHEMA] as it ** always used to do, [sqlite3_step()] will automatically recompile the SQL -** statement and try to run it again. +** statement and try to run it again. As many as [SQLITE_MAX_SCHEMA_RETRY] +** retries will occur before sqlite3_step() gives up and returns an error. **
  • ** **
  • @@ -3231,6 +3275,9 @@ typedef struct sqlite3_context sqlite3_context; ** parameter [SQLITE_LIMIT_VARIABLE_NUMBER] (default value: 999). ** ** ^The third argument is the value to bind to the parameter. +** ^If the third parameter to sqlite3_bind_text() or sqlite3_bind_text16() +** or sqlite3_bind_blob() is a NULL pointer then the fourth parameter +** is ignored and the end result is the same as sqlite3_bind_null(). ** ** ^(In those routines that have a fourth argument, its value is the ** number of bytes in the parameter. To be clear: the value is the @@ -4187,7 +4234,7 @@ SQLITE_API void sqlite3_set_auxdata(sqlite3_context*, int N, void*, void (*)(voi ** the content before returning. ** ** The typedef is necessary to work around problems in certain -** C++ compilers. See ticket #2191. +** C++ compilers. */ typedef void (*sqlite3_destructor_type)(void*); #define SQLITE_STATIC ((sqlite3_destructor_type)0) @@ -4986,11 +5033,20 @@ SQLITE_API int sqlite3_table_column_metadata( ** ^This interface loads an SQLite extension library from the named file. ** ** ^The sqlite3_load_extension() interface attempts to load an -** SQLite extension library contained in the file zFile. +** [SQLite extension] library contained in the file zFile. If +** the file cannot be loaded directly, attempts are made to load +** with various operating-system specific extensions added. +** So for example, if "samplelib" cannot be loaded, then names like +** "samplelib.so" or "samplelib.dylib" or "samplelib.dll" might +** be tried also. ** ** ^The entry point is zProc. -** ^zProc may be 0, in which case the name of the entry point -** defaults to "sqlite3_extension_init". +** ^(zProc may be 0, in which case SQLite will try to come up with an +** entry point name on its own. It first tries "sqlite3_extension_init". +** If that does not work, it constructs a name "sqlite3_X_init" where the +** X is consists of the lower-case equivalent of all ASCII alphabetic +** characters in the filename from the last "/" to the first following +** "." and omitting any initial "lib".)^ ** ^The sqlite3_load_extension() interface returns ** [SQLITE_OK] on success and [SQLITE_ERROR] if something goes wrong. ** ^If an error occurs and pzErrMsg is not 0, then the @@ -5016,11 +5072,11 @@ SQLITE_API int sqlite3_load_extension( ** CAPI3REF: Enable Or Disable Extension Loading ** ** ^So as not to open security holes in older applications that are -** unprepared to deal with extension loading, and as a means of disabling -** extension loading while evaluating user-entered SQL, the following API +** unprepared to deal with [extension loading], and as a means of disabling +** [extension loading] while evaluating user-entered SQL, the following API ** is provided to turn the [sqlite3_load_extension()] mechanism on and off. ** -** ^Extension loading is off by default. See ticket #1863. +** ^Extension loading is off by default. ** ^Call the sqlite3_enable_load_extension() routine with onoff==1 ** to turn extension loading on and call it with onoff==0 to turn ** it back off again. @@ -5032,7 +5088,7 @@ SQLITE_API int sqlite3_enable_load_extension(sqlite3 *db, int onoff); ** ** ^This interface causes the xEntryPoint() function to be invoked for ** each new [database connection] that is created. The idea here is that -** xEntryPoint() is the entry point for a statically linked SQLite extension +** xEntryPoint() is the entry point for a statically linked [SQLite extension] ** that is to be automatically loaded into all new database connections. ** ** ^(Even though the function prototype shows that xEntryPoint() takes @@ -6812,10 +6868,25 @@ SQLITE_API int sqlite3_unlock_notify( SQLITE_API int sqlite3_stricmp(const char *, const char *); SQLITE_API int sqlite3_strnicmp(const char *, const char *, int); +/* +** CAPI3REF: String Globbing +* +** ^The [sqlite3_strglob(P,X)] interface returns zero if string X matches +** the glob pattern P, and it returns non-zero if string X does not match +** the glob pattern P. ^The definition of glob pattern matching used in +** [sqlite3_strglob(P,X)] is the same as for the "X GLOB P" operator in the +** SQL dialect used by SQLite. ^The sqlite3_strglob(P,X) function is case +** sensitive. +** +** Note that this routine returns zero on a match and non-zero if the strings +** do not match, the same as [sqlite3_stricmp()] and [sqlite3_strnicmp()]. +*/ +SQLITE_API int sqlite3_strglob(const char *zGlob, const char *zStr); + /* ** CAPI3REF: Error Logging Interface ** -** ^The [sqlite3_log()] interface writes a message into the error log +** ^The [sqlite3_log()] interface writes a message into the [error log] ** established by the [SQLITE_CONFIG_LOG] option to [sqlite3_config()]. ** ^If logging is enabled, the zFormat string and subsequent arguments are ** used with [sqlite3_snprintf()] to generate the final output string. From df2841458d43b71d67082cbb765411d06f8ad81a Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Fri, 5 Jul 2013 02:00:14 -0400 Subject: [PATCH 042/118] Large overhaul in name and appearance for file analysis. --- scripts/base/files/extract/__load__.bro | 1 + scripts/base/files/extract/main.bro | 38 +++ scripts/base/files/hash/main.bro | 16 +- .../{file-analysis => files}/__load__.bro | 0 .../{file-analysis => files}/main.bro | 231 +++++++++++------- scripts/base/init-bare.bro | 4 +- scripts/base/init-default.bro | 1 + scripts/base/protocols/ftp/__load__.bro | 1 - scripts/base/protocols/ftp/file-analysis.bro | 37 +-- scripts/base/protocols/ftp/file-extract.bro | 90 ------- scripts/base/protocols/http/__load__.bro | 6 +- scripts/base/protocols/http/file-analysis.bro | 69 +++--- scripts/base/protocols/http/file-extract.bro | 100 -------- scripts/base/protocols/http/file-hash.bro | 68 ------ scripts/base/protocols/http/file-ident.bro | 105 -------- scripts/base/protocols/http/main.bro | 18 +- scripts/base/protocols/irc/__load__.bro | 2 +- scripts/base/protocols/irc/dcc-send.bro | 108 +------- scripts/base/protocols/irc/file-analysis.bro | 18 +- scripts/base/protocols/smtp/__load__.bro | 2 +- scripts/base/protocols/smtp/entities.bro | 212 ++-------------- scripts/base/protocols/smtp/file-analysis.bro | 15 +- .../policy/frameworks/files/detect-MHR.bro | 63 +++++ .../frameworks/files/hash-all-files.bro | 7 + .../frameworks/intel/smtp-url-extraction.bro | 2 +- .../protocols/smtp/entities-excerpt.bro | 0 scripts/site/local.bro | 11 +- src/const.bif | 2 +- src/event.bif | 10 +- src/file_analysis.bif | 22 +- src/file_analysis/Analyzer.h | 4 +- src/file_analysis/AnalyzerSet.cc | 2 +- src/file_analysis/DataEvent.cc | 2 +- src/file_analysis/Extract.cc | 2 +- src/file_analysis/Manager.cc | 4 +- .../file-analysis/bifs/remove_action.bro | 4 +- .../bifs/set_timeout_interval.bro | 2 +- .../frameworks/file-analysis/bifs/stop.bro | 2 +- testing/scripts/file-analysis-test.bro | 14 +- 39 files changed, 420 insertions(+), 875 deletions(-) create mode 100644 scripts/base/files/extract/__load__.bro create mode 100644 scripts/base/files/extract/main.bro rename scripts/base/frameworks/{file-analysis => files}/__load__.bro (100%) rename scripts/base/frameworks/{file-analysis => files}/main.bro (52%) delete mode 100644 scripts/base/protocols/ftp/file-extract.bro delete mode 100644 scripts/base/protocols/http/file-extract.bro delete mode 100644 scripts/base/protocols/http/file-hash.bro delete mode 100644 scripts/base/protocols/http/file-ident.bro create mode 100644 scripts/policy/frameworks/files/detect-MHR.bro create mode 100644 scripts/policy/frameworks/files/hash-all-files.bro rename scripts/{base => policy}/protocols/smtp/entities-excerpt.bro (100%) diff --git a/scripts/base/files/extract/__load__.bro b/scripts/base/files/extract/__load__.bro new file mode 100644 index 0000000000..d551be57d3 --- /dev/null +++ b/scripts/base/files/extract/__load__.bro @@ -0,0 +1 @@ +@load ./main \ No newline at end of file diff --git a/scripts/base/files/extract/main.bro b/scripts/base/files/extract/main.bro new file mode 100644 index 0000000000..70e61c8529 --- /dev/null +++ b/scripts/base/files/extract/main.bro @@ -0,0 +1,38 @@ +@load base/frameworks/files +@load base/utils/paths + +module FileExtract; + +export { + ## The prefix where files are extracted to. + const prefix = "./extract_files/" &redef; + + redef record Files::Info += { + ## Local filenames of extracted file. + extracted: string &optional &log; + }; + + redef record Files::AnalyzerArgs += { + ## The local filename to which to write an extracted file. + ## This field is used in the core by the extraction plugin + ## to know where to write the file to. It's also optional + extract_filename: string &optional; + }; +} + +function on_add(f: fa_file, args: Files::AnalyzerArgs) + { + if ( ! args?$extract_filename ) + args$extract_filename = cat("extract-", f$source, "-", f$id); + + f$info$extracted = args$extract_filename; + args$extract_filename = build_path_compressed(prefix, args$extract_filename); + } + +event bro_init() &priority=10 + { + Files::register_analyzer_add_callback(Files::ANALYZER_EXTRACT, on_add); + + # Create the extraction directory. + mkdir(prefix); + } \ No newline at end of file diff --git a/scripts/base/files/hash/main.bro b/scripts/base/files/hash/main.bro index cd50d6b291..926e39865a 100644 --- a/scripts/base/files/hash/main.bro +++ b/scripts/base/files/hash/main.bro @@ -1,13 +1,23 @@ +@load base/frameworks/files -module FilesHash; +module FileHash; export { - + redef record Files::Info += { + ## An MD5 digest of the file contents. + md5: string &log &optional; + + ## A SHA1 digest of the file contents. + sha1: string &log &optional; + + ## A SHA256 digest of the file contents. + sha256: string &log &optional; + }; + } event file_hash(f: fa_file, kind: string, hash: string) &priority=5 { - set_info(f); switch ( kind ) { case "md5": f$info$md5 = hash; diff --git a/scripts/base/frameworks/file-analysis/__load__.bro b/scripts/base/frameworks/files/__load__.bro similarity index 100% rename from scripts/base/frameworks/file-analysis/__load__.bro rename to scripts/base/frameworks/files/__load__.bro diff --git a/scripts/base/frameworks/file-analysis/main.bro b/scripts/base/frameworks/files/main.bro similarity index 52% rename from scripts/base/frameworks/file-analysis/main.bro rename to scripts/base/frameworks/files/main.bro index 7b1bd7d81c..1c0481a87c 100644 --- a/scripts/base/frameworks/file-analysis/main.bro +++ b/scripts/base/frameworks/files/main.bro @@ -3,8 +3,9 @@ @load base/file_analysis.bif @load base/frameworks/logging +@load base/utils/site -module FileAnalysis; +module Files; export { redef enum Log::ID += { @@ -14,21 +15,14 @@ export { ## A structure which represents a desired type of file analysis. type AnalyzerArgs: record { - ## The type of analysis. - tag: Analyzer; - - ## The local filename to which to write an extracted file. Must be - ## set when *tag* is :bro:see:`FileAnalysis::ANALYZER_EXTRACT`. - extract_filename: string &optional; - ## An event which will be generated for all new file contents, ## chunk-wise. Used when *tag* is - ## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`. + ## :bro:see:`Files::ANALYZER_DATA_EVENT`. chunk_event: event(f: fa_file, data: string, off: count) &optional; ## An event which will be generated for all new file contents, ## stream-wise. Used when *tag* is - ## :bro:see:`FileAnalysis::ANALYZER_DATA_EVENT`. + ## :bro:see:`Files::ANALYZER_DATA_EVENT`. stream_event: event(f: fa_file, data: string) &optional; } &redef; @@ -40,23 +34,52 @@ export { ts: time &log; ## An identifier associated with a single file. - id: string &log; + fuid: string &log; - ## Identifier associated with a container file from which this one was - ## extracted as part of the file analysis. - parent_id: string &log &optional; + ## If this file was transferred over a network + ## connection this should show the host or hosts that + ## the data sourced from. + tx_hosts: set[addr] &log; + + ## If this file was transferred over a network + ## connection this should show the host or hosts that + ## the data traveled to. + rx_hosts: set[addr] &log; + + ## Connection UIDS over which the file was transferred. + conn_uids: set[string] &log; ## An identification of the source of the file data. E.g. it may be ## a network protocol over which it was transferred, or a local file ## path which was read, or some other input source. source: string &log &optional; - ## If the source of this file is is a network connection, this field - ## may be set to indicate the directionality. - is_orig: bool &log &optional; + ## A value to represent the depth of this file in relation + ## to its source. In SMTP, it is the depth of the MIME + ## attachment on the message. In HTTP, it is the depth of the + ## request within the TCP connection. + depth: count &default=0 &log; - ## The time at which the last activity for the file was seen. - last_active: time &log; + ## A set of analysis types done during the file analysis. + analyzers: set[Analyzer] &log; + + ## A mime type provided by libmagic against the *bof_buffer*, or + ## in the cases where no buffering of the beginning of file occurs, + ## an initial guess of the mime type based on the first data seen. + mime_type: string &log &optional; + + ## A filename for the file if one is available from the source + ## for the file. These will frequently come from + ## "Content-Disposition" headers in network protocols. + filename: string &log &optional; + + ## The duration the file was analyzed for. + duration: interval &log &default=0secs; + + ## If the source of this file is is a network connection, this field + ## indicates if the data originated from the local network or not as + ## determined by the configured bro:see:`Site::local_nets`. + local_orig: bool &log &optional; ## Number of bytes provided to the file analysis engine for the file. seen_bytes: count &log &default=0; @@ -72,49 +95,18 @@ export { ## were delivered to file analyzers due to reassembly buffer overflow. overflow_bytes: count &log &default=0; - ## The amount of time between receiving new data for this file that - ## the analysis engine will wait before giving up on it. - timeout_interval: interval &log &optional; - - ## The number of bytes at the beginning of a file to save for later - ## inspection in *bof_buffer* field. - bof_buffer_size: count &log &optional; - - ## A mime type provided by libmagic against the *bof_buffer*, or - ## in the cases where no buffering of the beginning of file occurs, - ## an initial guess of the mime type based on the first data seen. - mime_type: string &log &optional; - ## Whether the file analysis timed out at least once for the file. timedout: bool &log &default=F; - ## Connection UIDS over which the file was transferred. - conn_uids: set[string] &log; - - ## A set of analysis types done during the file analysis. - analyzers: set[Analyzer]; - - ## Local filenames of extracted files. - extracted_files: set[string] &log; - - ## An MD5 digest of the file contents. - md5: string &log &optional; - - ## A SHA1 digest of the file contents. - sha1: string &log &optional; - - ## A SHA256 digest of the file contents. - sha256: string &log &optional; + ## Identifier associated with a container file from which this one was + ## extracted as part of the file analysis. + parent_fuid: string &log &optional; } &redef; ## A table that can be used to disable file analysis completely for ## any files transferred over given network protocol analyzers. const disable: table[AnalyzerTag] of bool = table() &redef; - ## Event that can be handled to access the Info record as it is sent on - ## to the logging framework. - global log_file_analysis: event(rec: Info); - ## The salt concatenated to unique file handle strings generated by ## :bro:see:`get_file_handle` before hashing them in to a file id ## (the *id* field of :bro:see:`fa_file`). @@ -146,7 +138,9 @@ export { ## Returns: true if the analyzer will be added, or false if analysis ## for the *id* isn't currently active or the *args* ## were invalid for the analyzer type. - global add_analyzer: function(f: fa_file, args: AnalyzerArgs): bool; + global add_analyzer: function(f: fa_file, + tag: Files::Analyzer, + args: AnalyzerArgs &default=AnalyzerArgs()): bool; ## Removes an analyzer from the analysis of a given file. ## @@ -156,7 +150,7 @@ export { ## ## Returns: true if the analyzer will be removed, or false if analysis ## for the *id* isn't currently active. - global remove_analyzer: function(f: fa_file, args: AnalyzerArgs): bool; + global remove_analyzer: function(f: fa_file, tag: Files::Analyzer, args: AnalyzerArgs): bool; ## Stops/ignores any further analysis of a given file. ## @@ -166,45 +160,75 @@ export { ## rest of it's contents, or false if analysis for the *id* ## isn't currently active. global stop: function(f: fa_file): bool; + + ## Register callbacks for protocols that work with the Files framework. + ## The callbacks must uniquely identify a file and each protocol can + ## only have a single callback registered for it. + ## + ## tag: Tag for the protocol analyzer having a callback being registered. + ## + ## callback: Function that can generate a file handle for the protocol analyzer + ## defined previously. + ## + ## Returns: true if the protocol being registered was not previously registered. + global register_protocol: function(tag: AnalyzerTag, callback: function(c: connection, is_orig: bool): string): bool; + + ## Register a callback for file analyzers to use if they need to do some manipulation + ## when they are being added to a file before the core code takes over. This is + ## unlikely to be interesting for users and should only be called by file analyzer + ## authors but it *not required*. + ## + ## tag: Tag for the file analyzer. + ## + ## callback: Function to execute when the given file analyzer is being added. + global register_analyzer_add_callback: function(tag: Files::Analyzer, callback: function(f: fa_file, args: AnalyzerArgs)); + + ## Event that can be handled to access the Info record as it is sent on + ## to the logging framework. + global log_files: event(rec: Info); } redef record fa_file += { info: Info &optional; }; +redef record AnalyzerArgs += { + # This is used interally for the core file analyzer api. + tag: Files::Analyzer &optional; +}; + +# Store the callbacks for protocol analyzers that have files. +global registered_protocols: table[AnalyzerTag] of function(c: connection, is_orig: bool): string = table() + &default=function(c: connection, is_orig: bool): string { return cat(c$uid, is_orig); }; + +global analyzer_add_callbacks: table[Files::Analyzer] of function(f: fa_file, args: AnalyzerArgs) = table(); + +event bro_init() &priority=5 + { + Log::create_stream(Files::LOG, [$columns=Info, $ev=log_files]); + } + function set_info(f: fa_file) { if ( ! f?$info ) { - local tmp: Info = Info($ts=network_time()); + local tmp: Info = Info($ts=f$last_active, + $fuid=f$id); f$info = tmp; } - f$info$ts = network_time(); - f$info$id = f$id; if ( f?$parent_id ) - f$info$parent_id = f$parent_id; + f$info$parent_fuid = f$parent_id; if ( f?$source ) f$info$source = f$source; - if ( f?$is_orig ) - f$info$is_orig = f$is_orig; - f$info$last_active = f$last_active; + f$info$duration = f$last_active - f$info$ts; f$info$seen_bytes = f$seen_bytes; if ( f?$total_bytes ) f$info$total_bytes = f$total_bytes; f$info$missing_bytes = f$missing_bytes; f$info$overflow_bytes = f$overflow_bytes; - f$info$timeout_interval = f$timeout_interval; - f$info$bof_buffer_size = f$bof_buffer_size; if ( f?$mime_type ) f$info$mime_type = f$mime_type; - if ( f?$conns ) - { - for ( cid in f$conns ) - { - add f$info$conn_uids[f$conns[cid]$uid]; - } - } } function set_timeout_interval(f: fa_file, t: interval): bool @@ -212,21 +236,31 @@ function set_timeout_interval(f: fa_file, t: interval): bool return __set_timeout_interval(f$id, t); } -function add_analyzer(f: fa_file, args: AnalyzerArgs): bool +function add_analyzer(f: fa_file, tag: Analyzer, args: AnalyzerArgs): bool { - if ( ! __add_analyzer(f$id, args) ) return F; + # This is to construct the correct args for the core API. + args$tag = tag; + add f$info$analyzers[tag]; - set_info(f); - add f$info$analyzers[args$tag]; - - if ( args$tag == FileAnalysis::ANALYZER_EXTRACT ) - add f$info$extracted_files[args$extract_filename]; + if ( tag in analyzer_add_callbacks ) + analyzer_add_callbacks[tag](f, args); + if ( ! __add_analyzer(f$id, args) ) + { + Reporter::warning(fmt("Analyzer %s not added successfully to file %s.", tag, f$id)); + return F; + } return T; } -function remove_analyzer(f: fa_file, args: AnalyzerArgs): bool +function register_analyzer_add_callback(tag: Files::Analyzer, callback: function(f: fa_file, args: AnalyzerArgs)) { + analyzer_add_callbacks[tag] = callback; + } + +function remove_analyzer(f: fa_file, tag: Files::Analyzer, args: AnalyzerArgs): bool + { + args$tag = tag; return __remove_analyzer(f$id, args); } @@ -235,25 +269,48 @@ function stop(f: fa_file): bool return __stop(f$id); } -event bro_init() &priority=5 +event file_new(f: fa_file) &priority=10 { - Log::create_stream(FileAnalysis::LOG, - [$columns=Info, $ev=log_file_analysis]); + set_info(f); } -event file_timeout(f: fa_file) &priority=5 +event file_over_new_connection(f: fa_file, c: connection) &priority=10 + { + set_info(f); + add f$info$conn_uids[c$uid]; + local cid = c$id; + add f$info$tx_hosts[f$is_orig ? cid$orig_h : cid$resp_h]; + if( |Site::local_nets| > 0 ) + f$info$local_orig=Site::is_local_addr(f$is_orig ? cid$orig_h : cid$resp_h); + + add f$info$rx_hosts[f$is_orig ? cid$resp_h : cid$orig_h]; + } + +event file_timeout(f: fa_file) &priority=10 { set_info(f); f$info$timedout = T; } - -event file_state_remove(f: fa_file) &priority=5 +event file_state_remove(f: fa_file) &priority=10 { set_info(f); } -event file_state_remove(f: fa_file) &priority=-5 +event file_state_remove(f: fa_file) &priority=-10 { - Log::write(FileAnalysis::LOG, f$info); + Log::write(Files::LOG, f$info); + } + +function register_protocol(tag: AnalyzerTag, callback: function(c: connection, is_orig: bool): string): bool + { + local result = (tag !in registered_protocols); + registered_protocols[tag] = callback; + return result; + } + +event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool) &priority=5 + { + local handler = registered_protocols[tag]; + set_file_handle(handler(c, is_orig)); } diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro index c4245d9052..4e1a5248c8 100644 --- a/scripts/base/init-bare.bro +++ b/scripts/base/init-bare.bro @@ -339,7 +339,7 @@ type fa_file: record { ## An identification of the source of the file data. E.g. it may be ## a network protocol over which it was transferred, or a local file ## path which was read, or some other input source. - source: string &optional; + source: string; ## If the source of this file is is a network connection, this field ## may be set to indicate the directionality. @@ -3101,4 +3101,4 @@ const snaplen = 8192 &redef; @load base/frameworks/input -@load base/frameworks/file-analysis +@load base/frameworks/files diff --git a/scripts/base/init-default.bro b/scripts/base/init-default.bro index 03ba474e0b..719842af09 100644 --- a/scripts/base/init-default.bro +++ b/scripts/base/init-default.bro @@ -47,5 +47,6 @@ @load base/protocols/syslog @load base/files/hash +@load base/files/extract @load base/misc/find-checksum-offloading diff --git a/scripts/base/protocols/ftp/__load__.bro b/scripts/base/protocols/ftp/__load__.bro index 464571dc7d..9c839610ac 100644 --- a/scripts/base/protocols/ftp/__load__.bro +++ b/scripts/base/protocols/ftp/__load__.bro @@ -1,5 +1,4 @@ @load ./utils-commands @load ./main @load ./file-analysis -@load ./file-extract @load ./gridftp diff --git a/scripts/base/protocols/ftp/file-analysis.bro b/scripts/base/protocols/ftp/file-analysis.bro index f8fa2d816b..3710a44cee 100644 --- a/scripts/base/protocols/ftp/file-analysis.bro +++ b/scripts/base/protocols/ftp/file-analysis.bro @@ -1,6 +1,6 @@ @load ./main @load base/utils/conn-ids -@load base/frameworks/file-analysis/main +@load base/frameworks/files module FTP; @@ -9,40 +9,15 @@ export { global get_file_handle: function(c: connection, is_orig: bool): string; } -function get_handle_string(c: connection): string - { - return cat(ANALYZER_FTP_DATA, " ", c$start_time, " ", id_string(c$id)); - } - function get_file_handle(c: connection, is_orig: bool): string { - if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) return ""; + if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) + return ""; - local info: FTP::Info = ftp_data_expected[c$id$resp_h, c$id$resp_p]; - - if ( info$passive ) - # FTP client initiates data channel. - if ( is_orig ) - # Don't care about FTP client data. - return ""; - else - # Do care about FTP server data. - return get_handle_string(c); - else - # FTP server initiates dta channel. - if ( is_orig ) - # Do care about FTP server data. - return get_handle_string(c); - else - # Don't care about FTP client data. - return ""; + return cat(ANALYZER_FTP_DATA, c$start_time, c$id, is_orig); } -module GLOBAL; - -event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool) - &priority=5 +event bro_init() &priority=5 { - if ( tag != ANALYZER_FTP_DATA ) return; - set_file_handle(FTP::get_file_handle(c, is_orig)); + Files::register_protocol(ANALYZER_FTP_DATA, FTP::get_file_handle); } diff --git a/scripts/base/protocols/ftp/file-extract.bro b/scripts/base/protocols/ftp/file-extract.bro deleted file mode 100644 index 2b7bb8cd50..0000000000 --- a/scripts/base/protocols/ftp/file-extract.bro +++ /dev/null @@ -1,90 +0,0 @@ -##! File extraction support for FTP. - -@load ./main -@load base/utils/files - -module FTP; - -export { - ## Pattern of file mime types to extract from FTP transfers. - const extract_file_types = /NO_DEFAULT/ &redef; - - ## The on-disk prefix for files to be extracted from FTP-data transfers. - const extraction_prefix = "ftp-item" &redef; -} - -redef record Info += { - ## On disk file where it was extracted to. - extraction_file: string &log &optional; - - ## Indicates if the current command/response pair should attempt to - ## extract the file if a file was transferred. - extract_file: bool &default=F; -}; - -function get_extraction_name(f: fa_file): string - { - local r = fmt("%s-%s.dat", extraction_prefix, f$id); - return r; - } - -event file_new(f: fa_file) &priority=5 - { - if ( ! f?$source ) return; - if ( f$source != "FTP_DATA" ) return; - - if ( f?$mime_type && extract_file_types in f$mime_type ) - { - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, - $extract_filename=get_extraction_name(f)]); - return; - } - - if ( ! f?$conns ) return; - - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - - if ( [cid$resp_h, cid$resp_p] !in ftp_data_expected ) next; - - local s = ftp_data_expected[cid$resp_h, cid$resp_p]; - - if ( ! s$extract_file ) next; - - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, - $extract_filename=get_extraction_name(f)]); - return; - } - } - -event file_state_remove(f: fa_file) &priority=4 - { - if ( ! f?$source ) return; - if ( f$source != "FTP_DATA" ) return; - if ( ! f?$info ) return; - - for ( filename in f$info$extracted_files ) - { - local s: FTP::Info; - s$ts = network_time(); - s$tags = set(); - s$user = ""; - s$extraction_file = filename; - - if ( f?$conns ) - for ( cid in f$conns ) - { - s$uid = f$conns[cid]$uid; - s$id = cid; - } - - Log::write(FTP::LOG, s); - } - } - -event log_ftp(rec: Info) &priority=-10 - { - delete rec$extraction_file; - delete rec$extract_file; - } diff --git a/scripts/base/protocols/http/__load__.bro b/scripts/base/protocols/http/__load__.bro index 58618dedc7..585b815eed 100644 --- a/scripts/base/protocols/http/__load__.bro +++ b/scripts/base/protocols/http/__load__.bro @@ -1,6 +1,6 @@ @load ./main @load ./utils @load ./file-analysis -@load ./file-ident -@load ./file-hash -@load ./file-extract +#@load ./file-ident +#@load ./file-hash +#@load ./file-extract diff --git a/scripts/base/protocols/http/file-analysis.bro b/scripts/base/protocols/http/file-analysis.bro index 769bb509f5..b79ca041b8 100644 --- a/scripts/base/protocols/http/file-analysis.bro +++ b/scripts/base/protocols/http/file-analysis.bro @@ -1,53 +1,58 @@ @load ./main @load ./utils @load base/utils/conn-ids -@load base/frameworks/file-analysis/main +@load base/frameworks/files module HTTP; export { - redef record HTTP::Info += { - ## Number of MIME entities in the HTTP request message body so far. - request_mime_level: count &default=0; - ## Number of MIME entities in the HTTP response message body so far. - response_mime_level: count &default=0; + redef record Info += { + ## The sniffed mime type of the data being sent by the client. + client_mime_type: string &log &optional; + + ## The sniffed mime type of the data being returned by the server. + mime_type: string &log &optional; }; ## Default file handle provider for HTTP. global get_file_handle: function(c: connection, is_orig: bool): string; } -event http_begin_entity(c: connection, is_orig: bool) &priority=5 - { - if ( ! c?$http ) return; - - if ( is_orig ) - ++c$http$request_mime_level; - else - ++c$http$response_mime_level; - } - function get_file_handle(c: connection, is_orig: bool): string { - if ( ! c?$http ) return ""; - - local mime_level: count = - is_orig ? c$http$request_mime_level : c$http$response_mime_level; - local mime_level_str: string = mime_level > 1 ? cat(mime_level) : ""; + if ( ! c?$http ) + return ""; + local mime_depth = is_orig ? c$http$orig_mime_depth : c$http$resp_mime_depth; if ( c$http$range_request ) - return cat(ANALYZER_HTTP, " ", is_orig, " ", c$id$orig_h, " ", - build_url(c$http)); - - return cat(ANALYZER_HTTP, " ", c$start_time, " ", is_orig, " ", - c$http$trans_depth, mime_level_str, " ", id_string(c$id)); + { + return cat(ANALYZER_HTTP, is_orig, c$id$orig_h, mime_depth, build_url(c$http)); + } + else + { + return cat(ANALYZER_HTTP, c$start_time, is_orig, + c$http$trans_depth, mime_depth, id_string(c$id)); + } } -module GLOBAL; - -event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool) - &priority=5 +event bro_init() &priority=5 { - if ( tag != ANALYZER_HTTP ) return; - set_file_handle(HTTP::get_file_handle(c, is_orig)); + Files::register_protocol(ANALYZER_HTTP, HTTP::get_file_handle); } + +event file_over_new_connection(f: fa_file, c: connection) &priority=5 + { + if ( c?$http ) + { + #if (!f?$mime_type) + # print f; +# + #if ( f$is_orig ) + # c$http$client_mime_type = f$mime_type; + #else + # c$http$mime_type = f$mime_type; + + if ( c$http?$filename ) + f$info$filename = c$http$filename; + } + } \ No newline at end of file diff --git a/scripts/base/protocols/http/file-extract.bro b/scripts/base/protocols/http/file-extract.bro deleted file mode 100644 index a8c6039395..0000000000 --- a/scripts/base/protocols/http/file-extract.bro +++ /dev/null @@ -1,100 +0,0 @@ -##! Extracts the items from HTTP traffic, one per file. At this time only -##! the message body from the server can be extracted with this script. - -@load ./main -@load ./file-analysis - -module HTTP; - -export { - ## Pattern of file mime types to extract from HTTP response entity bodies. - const extract_file_types = /NO_DEFAULT/ &redef; - - ## The on-disk prefix for files to be extracted from HTTP entity bodies. - const extraction_prefix = "http-item" &redef; - - redef record Info += { - ## On-disk location where files in request body were extracted. - extracted_request_files: vector of string &log &optional; - - ## On-disk location where files in response body were extracted. - extracted_response_files: vector of string &log &optional; - - ## Indicates if the response body is to be extracted or not. Must be - ## set before or by the first :bro:see:`file_new` for the file content. - extract_file: bool &default=F; - }; -} - -function get_extraction_name(f: fa_file): string - { - local r = fmt("%s-%s.dat", extraction_prefix, f$id); - return r; - } - -function add_extraction_file(c: connection, is_orig: bool, fn: string) - { - if ( is_orig ) - { - if ( ! c$http?$extracted_request_files ) - c$http$extracted_request_files = vector(); - c$http$extracted_request_files[|c$http$extracted_request_files|] = fn; - } - else - { - if ( ! c$http?$extracted_response_files ) - c$http$extracted_response_files = vector(); - c$http$extracted_response_files[|c$http$extracted_response_files|] = fn; - } - } - -event file_new(f: fa_file) &priority=5 - { - if ( ! f?$source ) return; - if ( f$source != "HTTP" ) return; - if ( ! f?$conns ) return; - - local fname: string; - local c: connection; - - if ( f?$mime_type && extract_file_types in f$mime_type ) - { - fname = get_extraction_name(f); - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, - $extract_filename=fname]); - - for ( cid in f$conns ) - { - c = f$conns[cid]; - if ( ! c?$http ) next; - add_extraction_file(c, f$is_orig, fname); - } - - return; - } - - local extracting: bool = F; - - for ( cid in f$conns ) - { - c = f$conns[cid]; - - if ( ! c?$http ) next; - - if ( ! c$http$extract_file ) next; - - fname = get_extraction_name(f); - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, - $extract_filename=fname]); - extracting = T; - break; - } - - if ( extracting ) - for ( cid in f$conns ) - { - c = f$conns[cid]; - if ( ! c?$http ) next; - add_extraction_file(c, f$is_orig, fname); - } - } diff --git a/scripts/base/protocols/http/file-hash.bro b/scripts/base/protocols/http/file-hash.bro deleted file mode 100644 index 34d91e45bb..0000000000 --- a/scripts/base/protocols/http/file-hash.bro +++ /dev/null @@ -1,68 +0,0 @@ -##! Calculate hashes for HTTP body transfers. - -@load ./main -@load ./file-analysis - -module HTTP; - -export { - redef record Info += { - ## MD5 sum for a file transferred over HTTP calculated from the - ## response body. - md5: string &log &optional; - - ## This value can be set per-transfer to determine per request - ## if a file should have an MD5 sum generated. It must be - ## set to T at the time of or before the first chunk of body data. - calc_md5: bool &default=F; - }; - - ## Generate MD5 sums for these filetypes. - const generate_md5 = /application\/x-dosexec/ # Windows and DOS executables - | /application\/x-executable/ # *NIX executable binary - &redef; -} - -event file_new(f: fa_file) &priority=5 - { - if ( ! f?$source ) return; - if ( f$source != "HTTP" ) return; - - if ( f?$mime_type && generate_md5 in f$mime_type ) - { - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]); - return; - } - - if ( ! f?$conns ) return; - - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - - if ( ! c?$http ) next; - - if ( ! c$http$calc_md5 ) next; - - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]); - return; - } - } - -event file_state_remove(f: fa_file) &priority=4 - { - if ( ! f?$source ) return; - if ( f$source != "HTTP" ) return; - if ( ! f?$conns ) return; - if ( ! f?$info ) return; - if ( ! f$info?$md5 ) return; - - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - - if ( ! c?$http ) next; - - c$http$md5 = f$info$md5; - } - } diff --git a/scripts/base/protocols/http/file-ident.bro b/scripts/base/protocols/http/file-ident.bro deleted file mode 100644 index 7ed4b58a37..0000000000 --- a/scripts/base/protocols/http/file-ident.bro +++ /dev/null @@ -1,105 +0,0 @@ -##! Identification of file types in HTTP response bodies with file content sniffing. - -@load base/frameworks/notice -@load ./main -@load ./utils -@load ./file-analysis - -module HTTP; - -export { - redef enum Notice::Type += { - ## Indicates when the file extension doesn't seem to match the file - ## contents. - Incorrect_File_Type, - }; - - redef record Info += { - ## Mime type of response body identified by content sniffing. - mime_type: string &log &optional; - }; - - ## Mapping between mime type strings (without character set) and - ## regular expressions for URLs. - ## The :bro:enum:`HTTP::Incorrect_File_Type` notice is generated if the - ## pattern doesn't match the mime type that was discovered. - const mime_types_extensions: table[string] of pattern = { - ["application/x-dosexec"] = /\.([eE][xX][eE]|[dD][lL][lL])/, - } &redef; - - ## A pattern for filtering out :bro:enum:`HTTP::Incorrect_File_Type` urls - ## that are not noteworthy before a notice is created. Each - ## pattern added should match the complete URL (the matched URLs include - ## "http://" at the beginning). - const ignored_incorrect_file_type_urls = /^$/ &redef; -} - -event file_new(f: fa_file) &priority=5 - { - if ( ! f?$source ) return; - if ( f$source != "HTTP" ) return; - if ( ! f?$mime_type ) return; - if ( ! f?$conns ) return; - - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - - if ( ! c?$http ) next; - - c$http$mime_type = f$mime_type; - - local mime_str: string = c$http$mime_type; - - if ( mime_str !in mime_types_extensions ) next; - if ( ! c$http?$uri ) next; - if ( mime_types_extensions[mime_str] in c$http$uri ) next; - - local url = build_url_http(c$http); - - if ( url == ignored_incorrect_file_type_urls ) next; - - local message = fmt("%s %s %s", mime_str, c$http$method, url); - NOTICE([$note=Incorrect_File_Type, - $msg=message, - $conn=c]); - } - } - -event file_over_new_connection(f: fa_file, c: connection) &priority=5 - { - if ( ! f?$source ) return; - if ( f$source != "HTTP" ) return; - if ( ! f?$mime_type ) return; - if ( ! c?$http ) return; - - # Spread the mime around (e.g. for partial content, file_type event only - # happens once for the first connection, but if there's subsequent - # connections to transfer the same file, they'll be lacking the mime_type - # field if we don't do this). - c$http$mime_type = f$mime_type; - } - -# Tracks byte-range request / partial content response mime types, indexed -# by [connection, uri] pairs. This is needed because a person can pipeline -# byte-range requests over multiple connections to the same uri. Without -# the tracking, only the first request in the pipeline for each connection -# would get a mime_type field assigned to it (by the FileAnalysis policy hooks). -global partial_types: table[conn_id, string] of string &read_expire=5mins; - -# Priority 4 so that it runs before the handler that will write to http.log. -event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) - &priority=4 - { - if ( ! c$http$range_request ) return; - if ( ! c$http?$uri ) return; - - if ( c$http?$mime_type ) - { - partial_types[c$id, c$http$uri] = c$http$mime_type; - return; - } - - if ( [c$id, c$http$uri] in partial_types ) - c$http$mime_type = partial_types[c$id, c$http$uri]; - } diff --git a/scripts/base/protocols/http/main.bro b/scripts/base/protocols/http/main.bro index a1771c8e77..ebf412d36e 100644 --- a/scripts/base/protocols/http/main.bro +++ b/scripts/base/protocols/http/main.bro @@ -71,10 +71,14 @@ export { ## All of the headers that may indicate if the request was proxied. proxied: set[string] &log &optional; - + ## Indicates if this request can assume 206 partial content in ## response. - range_request: bool &default=F; + range_request: bool &default=F; + ## Number of MIME entities in the HTTP request message body so far. + orig_mime_depth: count &default=0; + ## Number of MIME entities in the HTTP response message body so far. + resp_mime_depth: count &default=0; }; ## Structure to maintain state for an HTTP connection with multiple @@ -283,6 +287,16 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr } } +event http_begin_entity(c: connection, is_orig: bool) &priority=5 + { + set_state(c, F, is_orig); + + if ( is_orig ) + ++c$http$orig_mime_depth; + else + ++c$http$resp_mime_depth; + } + event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority = 5 { set_state(c, F, is_orig); diff --git a/scripts/base/protocols/irc/__load__.bro b/scripts/base/protocols/irc/__load__.bro index 5123385b0c..d20550c54f 100644 --- a/scripts/base/protocols/irc/__load__.bro +++ b/scripts/base/protocols/irc/__load__.bro @@ -1,3 +1,3 @@ @load ./main -@load ./dcc-send +#@load ./dcc-send @load ./file-analysis diff --git a/scripts/base/protocols/irc/dcc-send.bro b/scripts/base/protocols/irc/dcc-send.bro index 53381d0302..afe01485a2 100644 --- a/scripts/base/protocols/irc/dcc-send.bro +++ b/scripts/base/protocols/irc/dcc-send.bro @@ -15,12 +15,6 @@ module IRC; export { - ## Pattern of file mime types to extract from IRC DCC file transfers. - const extract_file_types = /NO_DEFAULT/ &redef; - - ## On-disk prefix for files to be extracted from IRC DCC file transfers. - const extraction_prefix = "irc-dcc-item" &redef; - redef record Info += { ## DCC filename requested. dcc_file_name: string &log &optional; @@ -28,101 +22,10 @@ export { dcc_file_size: count &log &optional; ## Sniffed mime type of the file. dcc_mime_type: string &log &optional; - - ## The file handle for the file to be extracted - extraction_file: string &log &optional; - - ## A boolean to indicate if the current file transfer should be extracted. - extract_file: bool &default=F; }; } -global dcc_expected_transfers: table[addr, port] of Info &read_expire=5mins; - -function set_dcc_mime(f: fa_file) - { - if ( ! f?$conns ) return; - - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - - if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next; - - local s = dcc_expected_transfers[cid$resp_h, cid$resp_p]; - - s$dcc_mime_type = f$mime_type; - } - } - -function set_dcc_extraction_file(f: fa_file, filename: string) - { - if ( ! f?$conns ) return; - - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - - if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next; - - local s = dcc_expected_transfers[cid$resp_h, cid$resp_p]; - - s$extraction_file = filename; - } - } - -function get_extraction_name(f: fa_file): string - { - local r = fmt("%s-%s.dat", extraction_prefix, f$id); - return r; - } - -# this handler sets the IRC::Info mime type -event file_new(f: fa_file) &priority=5 - { - if ( ! f?$source ) return; - if ( f$source != "IRC_DATA" ) return; - if ( ! f?$mime_type ) return; - - set_dcc_mime(f); - } - -# this handler check if file extraction is desired -event file_new(f: fa_file) &priority=5 - { - if ( ! f?$source ) return; - if ( f$source != "IRC_DATA" ) return; - - local fname: string; - - if ( f?$mime_type && extract_file_types in f$mime_type ) - { - fname = get_extraction_name(f); - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, - $extract_filename=fname]); - set_dcc_extraction_file(f, fname); - return; - } - - if ( ! f?$conns ) return; - - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - - if ( [cid$resp_h, cid$resp_p] !in dcc_expected_transfers ) next; - - local s = dcc_expected_transfers[cid$resp_h, cid$resp_p]; - - if ( ! s$extract_file ) next; - - fname = get_extraction_name(f); - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, - $extract_filename=fname]); - s$extraction_file = fname; - return; - } - } +global dcc_expected_transfers: table[addr, port] of Info &synchronized &read_expire=5mins; function log_dcc(f: fa_file) { @@ -143,22 +46,17 @@ function log_dcc(f: fa_file) # Delete these values in case another DCC transfer # happens during the IRC session. - delete irc$extract_file; - delete irc$extraction_file; delete irc$dcc_file_name; delete irc$dcc_file_size; delete irc$dcc_mime_type; - return; } } event file_new(f: fa_file) &priority=-5 { - if ( ! f?$source ) return; - if ( f$source != "IRC_DATA" ) return; - - log_dcc(f); + if ( f?$source && f$source == "IRC_DATA" ) + log_dcc(f); } event irc_dcc_message(c: connection, is_orig: bool, diff --git a/scripts/base/protocols/irc/file-analysis.bro b/scripts/base/protocols/irc/file-analysis.bro index 5159064b27..f2e84fbc22 100644 --- a/scripts/base/protocols/irc/file-analysis.bro +++ b/scripts/base/protocols/irc/file-analysis.bro @@ -1,6 +1,6 @@ -@load ./dcc-send.bro +@load ./dcc-send @load base/utils/conn-ids -@load base/frameworks/file-analysis/main +@load base/frameworks/files module IRC; @@ -11,15 +11,13 @@ export { function get_file_handle(c: connection, is_orig: bool): string { - if ( is_orig ) return ""; - return cat(ANALYZER_IRC_DATA, " ", c$start_time, " ", id_string(c$id)); + if ( [c$id$resp_h, c$id$resp_p] !in dcc_expected_transfers ) + return ""; + + return cat(ANALYZER_IRC_DATA, c$start_time, c$id, is_orig); } -module GLOBAL; - -event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool) - &priority=5 +event bro_init() &priority=5 { - if ( tag != ANALYZER_IRC_DATA ) return; - set_file_handle(IRC::get_file_handle(c, is_orig)); + Files::register_protocol(ANALYZER_IRC_DATA, IRC::get_file_handle); } diff --git a/scripts/base/protocols/smtp/__load__.bro b/scripts/base/protocols/smtp/__load__.bro index bac9cc118f..1e913d8dff 100644 --- a/scripts/base/protocols/smtp/__load__.bro +++ b/scripts/base/protocols/smtp/__load__.bro @@ -1,4 +1,4 @@ @load ./main @load ./entities -@load ./entities-excerpt +#@load ./entities-excerpt @load ./file-analysis diff --git a/scripts/base/protocols/smtp/entities.bro b/scripts/base/protocols/smtp/entities.bro index b58766e51d..dcb53dc0aa 100644 --- a/scripts/base/protocols/smtp/entities.bro +++ b/scripts/base/protocols/smtp/entities.bro @@ -1,5 +1,6 @@ ##! Analysis and logging for MIME entities found in SMTP sessions. +@load base/frameworks/files @load base/utils/strings @load base/utils/files @load ./main @@ -7,217 +8,56 @@ module SMTP; export { - redef enum Log::ID += { ENTITIES_LOG }; - - type EntityInfo: record { - ## This is the timestamp of when the MIME content transfer began. - ts: time &log; - uid: string &log; - id: conn_id &log; - ## A count to represent the depth of this message transaction in a - ## single connection where multiple messages were transferred. - trans_depth: count &log; - ## The filename seen in the Content-Disposition header. - filename: string &log &optional; - ## Track how many bytes of the MIME encoded file have been seen. - content_len: count &log &default=0; - ## The mime type of the entity discovered through magic bytes identification. - mime_type: string &log &optional; - - ## The calculated MD5 sum for the MIME entity. - md5: string &log &optional; - ## Optionally calculate the file's MD5 sum. Must be set prior to the - ## first data chunk being see in an event. - calc_md5: bool &default=F; - - ## Optionally write the file to disk. Must be set prior to first - ## data chunk being seen in an event. - extract_file: bool &default=F; - ## Store the file handle here for the file currently being extracted. - extraction_file: string &log &optional; + type Entity: record { + filename: string &optional; }; redef record Info += { - ## The in-progress entity information. - current_entity: EntityInfo &optional; + ## The current entity being seen. + entity: Entity &optional; }; redef record State += { - ## Track the number of MIME encoded files transferred during a session. - mime_level: count &default=0; + ## Track the number of MIME encoded files transferred + ## during a session. + mime_depth: count &default=0; }; - - ## Generate MD5 sums for these filetypes. - const generate_md5 = /application\/x-dosexec/ # Windows and DOS executables - | /application\/x-executable/ # *NIX executable binary - &redef; - - ## Pattern of file mime types to extract from MIME bodies. - const extract_file_types = /NO_DEFAULT/ &redef; - - ## The on-disk prefix for files to be extracted from MIME entity bodies. - const extraction_prefix = "smtp-entity" &redef; - - ## If set, never generate MD5s. This is mainly for testing purposes to create - ## reproducable output in the case that the decision whether to create - ## checksums depends on environment specifics. - const never_calc_md5 = F &redef; - - global log_mime: event(rec: EntityInfo); } -event bro_init() &priority=5 - { - Log::create_stream(SMTP::ENTITIES_LOG, [$columns=EntityInfo, $ev=log_mime]); - } - -function set_session(c: connection, new_entity: bool) - { - if ( ! c$smtp?$current_entity || new_entity ) - { - local info: EntityInfo; - info$ts=network_time(); - info$uid=c$uid; - info$id=c$id; - info$trans_depth=c$smtp$trans_depth; - - c$smtp$current_entity = info; - ++c$smtp_state$mime_level; - } - } - -function get_extraction_name(f: fa_file): string - { - local r = fmt("%s-%s.dat", extraction_prefix, f$id); - return r; - } - event mime_begin_entity(c: connection) &priority=10 { - if ( ! c?$smtp ) return; + #print fmt("%s : begin entity", c$uid); - set_session(c, T); + c$smtp$entity = Entity(); + ++c$smtp_state$mime_depth; } -event file_new(f: fa_file) &priority=5 +event file_over_new_connection(f: fa_file, c: connection) &priority=5 { - if ( ! f?$source ) return; - if ( f$source != "SMTP" ) return; - if ( ! f?$conns ) return; - - local fname: string; - local extracting: bool = F; - - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - - if ( ! c?$smtp ) next; - if ( ! c$smtp?$current_entity ) next; - - if ( c$smtp$current_entity$extract_file ) - { - if ( ! extracting ) - { - fname = get_extraction_name(f); - FileAnalysis::add_analyzer(f, - [$tag=FileAnalysis::ANALYZER_EXTRACT, - $extract_filename=fname]); - extracting = T; - } - - c$smtp$current_entity$extraction_file = fname; - } - - if ( c$smtp$current_entity$calc_md5 ) - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]); - } - } - -function check_extract_by_type(f: fa_file) - { - if ( extract_file_types !in f$mime_type ) return; - - if ( f?$info && FileAnalysis::ANALYZER_EXTRACT in f$info$analyzers ) + if ( f$source != "SMTP" ) return; - local fname: string = get_extraction_name(f); - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, - $extract_filename=fname]); - - if ( ! f?$conns ) return; - - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - if ( ! c?$smtp ) next; - c$smtp$current_entity$extraction_file = fname; - } + if ( c$smtp$entity?$filename ) + f$info$filename = c$smtp$entity$filename; + f$info$depth = c$smtp_state$mime_depth; } -function check_md5_by_type(f: fa_file) +event mime_one_header(c: connection, h: mime_header_rec) &priority=5 { - if ( never_calc_md5 ) return; - if ( generate_md5 !in f$mime_type ) return; - - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]); - } - -event file_new(f: fa_file) &priority=5 - { - if ( ! f?$source ) return; - if ( f$source != "SMTP" ) return; - if ( ! f?$mime_type ) return; - - if ( f?$conns ) - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - - if ( ! c?$smtp ) next; - if ( ! c$smtp?$current_entity ) next; - - c$smtp$current_entity$mime_type = f$mime_type; - } - - check_extract_by_type(f); - check_md5_by_type(f); - } - -event file_state_remove(f: fa_file) &priority=4 - { - if ( ! f?$source ) return; - if ( f$source != "SMTP" ) return; - if ( ! f?$conns ) return; - - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - - if ( ! c?$smtp ) next; - if ( ! c$smtp?$current_entity ) next; - # Only log if there was some content. - if ( f$seen_bytes == 0 ) next; - - if ( f?$info && f$info?$md5 ) - c$smtp$current_entity$md5 = f$info$md5; - - c$smtp$current_entity$content_len = f$seen_bytes; - Log::write(SMTP::ENTITIES_LOG, c$smtp$current_entity); - delete c$smtp$current_entity; + if ( ! c?$smtp ) return; - } - } -event mime_one_header(c: connection, h: mime_header_rec) - { - if ( ! c?$smtp ) return; - if ( h$name == "CONTENT-DISPOSITION" && /[fF][iI][lL][eE][nN][aA][mM][eE]/ in h$value ) - c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value); + c$smtp$entity$filename = extract_filename_from_content_disposition(h$value); if ( h$name == "CONTENT-TYPE" && /[nN][aA][mM][eE][:blank:]*=/ in h$value ) - c$smtp$current_entity$filename = extract_filename_from_content_disposition(h$value); + c$smtp$entity$filename = extract_filename_from_content_disposition(h$value); + } + +event mime_end_entity(c: connection) &priority=5 + { + if ( c?$smtp && c$smtp?$entity ) + delete c$smtp$entity; } diff --git a/scripts/base/protocols/smtp/file-analysis.bro b/scripts/base/protocols/smtp/file-analysis.bro index b893cbef7d..44938c8698 100644 --- a/scripts/base/protocols/smtp/file-analysis.bro +++ b/scripts/base/protocols/smtp/file-analysis.bro @@ -1,7 +1,7 @@ @load ./main @load ./entities @load base/utils/conn-ids -@load base/frameworks/file-analysis/main +@load base/frameworks/files module SMTP; @@ -12,16 +12,11 @@ export { function get_file_handle(c: connection, is_orig: bool): string { - if ( ! c?$smtp ) return ""; - return cat(ANALYZER_SMTP, " ", c$start_time, " ", c$smtp$trans_depth, " ", - c$smtp_state$mime_level); + return cat(ANALYZER_SMTP, c$start_time, c$smtp$trans_depth, + c$smtp_state$mime_depth); } -module GLOBAL; - -event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool) - &priority=5 +event bro_init() &priority=5 { - if ( tag != ANALYZER_SMTP ) return; - set_file_handle(SMTP::get_file_handle(c, is_orig)); + Files::register_protocol(ANALYZER_SMTP, SMTP::get_file_handle); } diff --git a/scripts/policy/frameworks/files/detect-MHR.bro b/scripts/policy/frameworks/files/detect-MHR.bro new file mode 100644 index 0000000000..c896bd56fd --- /dev/null +++ b/scripts/policy/frameworks/files/detect-MHR.bro @@ -0,0 +1,63 @@ +##! Detect file downloads that have hash values matching files in Team +##! Cymru's Malware Hash Registry (http://www.team-cymru.org/Services/MHR/). + +@load base/frameworks/files +@load base/frameworks/notice +@load frameworks/files/hash-all-files + +module MalwareHashRegistery; + +export { + redef enum Notice::Type += { + ## The hash value of a file transferred over HTTP matched in the + ## malware hash registry. + Match + }; + + redef record Files::Info += { + ## Team Cymru Malware Hash Registry date of first detection. + mhr_first_detected: time &log &optional; + ## Team Cymru Malware Hash Registry percent of detection + ## among malware scanners. + mhr_detect_rate: count &log &optional; + }; + + ## File types to attempt matching against the Malware Hash Registry. + const match_file_types = /^application\/x-dosexec/ &redef; + + ## The malware hash registry runs each malware sample through several A/V engines. + ## Team Cymru returns a percentage to indicate how many A/V engines flagged the + ## sample as malicious. This threshold allows you to require a minimum detection + ## rate. + const notice_threshold = 10 &redef; +} + +event file_hash(f: fa_file, kind: string, hash: string) + { + if ( kind=="sha1" && match_file_types in f$mime_type ) + { + local hash_domain = fmt("%s.malware.hash.cymru.com", hash); + when ( local MHR_result = lookup_hostname_txt(hash_domain) ) + { + # Data is returned as " " + local MHR_answer = split1(MHR_result, / /); + if ( |MHR_answer| == 2 ) + { + f$info$mhr_first_detected = double_to_time(to_double(MHR_answer[1])); + f$info$mhr_detect_rate = to_count(MHR_answer[2]); + + #print strftime("%Y-%m-%d %H:%M:%S", f$info$mhr_first_detected); + if ( f$info$mhr_detect_rate >= notice_threshold ) + { + local url = ""; + # TODO: Create a generic mechanism for creating file "urls". + #if ( f$source == "HTTP" ) + # url = HTTP::build_url_http(f); + local message = fmt("%s %s", hash, url); + #local message = fmt("Host(s) %s sent a file with SHA1 hash %s to host %s", f$src_host, hash, f$dst_host); + NOTICE([$note=Match, $msg=message]); + } + } + } + } + } diff --git a/scripts/policy/frameworks/files/hash-all-files.bro b/scripts/policy/frameworks/files/hash-all-files.bro new file mode 100644 index 0000000000..931857c2bc --- /dev/null +++ b/scripts/policy/frameworks/files/hash-all-files.bro @@ -0,0 +1,7 @@ +# Perform MD5 and SHA1 hashing on all files. + +event file_new(f: fa_file) + { + Files::add_analyzer(f, Files::ANALYZER_MD5); + Files::add_analyzer(f, Files::ANALYZER_SHA1); + } diff --git a/scripts/policy/frameworks/intel/smtp-url-extraction.bro b/scripts/policy/frameworks/intel/smtp-url-extraction.bro index 2b87f809a6..b4ab32a915 100644 --- a/scripts/policy/frameworks/intel/smtp-url-extraction.bro +++ b/scripts/policy/frameworks/intel/smtp-url-extraction.bro @@ -26,6 +26,6 @@ event file_new(f: fa_file) &priority=5 if ( ! f?$source ) return; if ( f$source != "SMTP" ) return; - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_DATA_EVENT, + Files::add_analyzer(f, [$tag=Files::ANALYZER_DATA_EVENT, $stream_event=intel_mime_data]); } diff --git a/scripts/base/protocols/smtp/entities-excerpt.bro b/scripts/policy/protocols/smtp/entities-excerpt.bro similarity index 100% rename from scripts/base/protocols/smtp/entities-excerpt.bro rename to scripts/policy/protocols/smtp/entities-excerpt.bro diff --git a/scripts/site/local.bro b/scripts/site/local.bro index dfebd9923a..e4b3a44e7a 100644 --- a/scripts/site/local.bro +++ b/scripts/site/local.bro @@ -64,7 +64,14 @@ # Detect logins using "interesting" hostnames. @load protocols/ssh/interesting-hostnames -# Detect MD5 sums in Team Cymru's Malware Hash Registry. -@load protocols/http/detect-MHR # Detect SQL injection attacks. @load protocols/http/detect-sqli + +#### Network File Handling #### + +# Enable MD5 and SHA1 hashing for all files. +@load frameworks/files/hash-all-files + +# Detect SHA1 sums in Team Cymru's Malware Hash Registry. +@load frameworks/files/detect-MHR + diff --git a/src/const.bif b/src/const.bif index 31e6ccee1a..10dceda6ff 100644 --- a/src/const.bif +++ b/src/const.bif @@ -24,4 +24,4 @@ const Tunnel::ip_tunnel_timeout: interval; const Threading::heartbeat_interval: interval; -const FileAnalysis::salt: string; +const Files::salt: string; diff --git a/src/event.bif b/src/event.bif index 5b14c05933..23ebc0591b 100644 --- a/src/event.bif +++ b/src/event.bif @@ -7001,7 +7001,7 @@ event event_queue_flush_point%(%); event get_file_handle%(tag: count, c: connection, is_orig: bool%); ## Indicates that an analysis of a new file has begun. The analysis can be -## augmented at this time via :bro:see:`FileAnalysis::add_analyzer`. +## augmented at this time via :bro:see:`Files::add_analyzer`. ## ## f: The file. ## @@ -7024,8 +7024,8 @@ event file_over_new_connection%(f: fa_file, c: connection%); ## f: The file. ## ## .. bro:see:: file_new file_over_new_connection file_gap file_state_remove -## default_file_timeout_interval FileAnalysis::set_timeout_interval -## FileAnalysis::set_timeout_interval +## default_file_timeout_interval Files::set_timeout_interval +## Files::set_timeout_interval event file_timeout%(f: fa_file%); ## Indicates that a chunk of the file is missing. @@ -7055,8 +7055,8 @@ event file_state_remove%(f: fa_file%); ## ## hash: The result of the hashing. ## -## .. bro:see:: FileAnalysis::add_analyzer FileAnalysis::ANALYZER_MD5 -## FileAnalysis::ANALYZER_SHA1 FileAnalysis::ANALYZER_SHA256 +## .. bro:see:: Files::add_analyzer Files::ANALYZER_MD5 +## Files::ANALYZER_SHA1 Files::ANALYZER_SHA256 event file_hash%(f: fa_file, kind: string, hash: string%); ## Deprecated. Will be removed. diff --git a/src/file_analysis.bif b/src/file_analysis.bif index ef46ccf9c1..648c031221 100644 --- a/src/file_analysis.bif +++ b/src/file_analysis.bif @@ -1,6 +1,6 @@ ##! Internal functions and types used by the logging framework. -module FileAnalysis; +module Files; %%{ #include "file_analysis/Manager.h" @@ -27,35 +27,35 @@ enum Analyzer %{ ANALYZER_DATA_EVENT, %} -## :bro:see:`FileAnalysis::set_timeout_interval`. -function FileAnalysis::__set_timeout_interval%(file_id: string, t: interval%): bool +## :bro:see:`Files::set_timeout_interval`. +function Files::__set_timeout_interval%(file_id: string, t: interval%): bool %{ bool result = file_mgr->SetTimeoutInterval(file_id->CheckString(), t); return new Val(result, TYPE_BOOL); %} -## :bro:see:`FileAnalysis::add_analyzer`. -function FileAnalysis::__add_analyzer%(file_id: string, args: any%): bool +## :bro:see:`Files::add_analyzer`. +function Files::__add_analyzer%(file_id: string, args: any%): bool %{ - using BifType::Record::FileAnalysis::AnalyzerArgs; + using BifType::Record::Files::AnalyzerArgs; RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs); bool result = file_mgr->AddAnalyzer(file_id->CheckString(), rv); Unref(rv); return new Val(result, TYPE_BOOL); %} -## :bro:see:`FileAnalysis::remove_analyzer`. -function FileAnalysis::__remove_analyzer%(file_id: string, args: any%): bool +## :bro:see:`Files::remove_analyzer`. +function Files::__remove_analyzer%(file_id: string, args: any%): bool %{ - using BifType::Record::FileAnalysis::AnalyzerArgs; + using BifType::Record::Files::AnalyzerArgs; RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs); bool result = file_mgr->RemoveAnalyzer(file_id->CheckString(), rv); Unref(rv); return new Val(result, TYPE_BOOL); %} -## :bro:see:`FileAnalysis::stop`. -function FileAnalysis::__stop%(file_id: string%): bool +## :bro:see:`Files::stop`. +function Files::__stop%(file_id: string%): bool %{ bool result = file_mgr->IgnoreFile(file_id->CheckString()); return new Val(result, TYPE_BOOL); diff --git a/src/file_analysis/Analyzer.h b/src/file_analysis/Analyzer.h index d32532b264..c348ab358b 100644 --- a/src/file_analysis/Analyzer.h +++ b/src/file_analysis/Analyzer.h @@ -8,7 +8,7 @@ namespace file_analysis { -typedef BifEnum::FileAnalysis::Analyzer FA_Tag; +typedef BifEnum::Files::Analyzer FA_Tag; class File; @@ -93,7 +93,7 @@ public: */ static FA_Tag ArgsTag(const RecordVal* args) { - using BifType::Record::FileAnalysis::AnalyzerArgs; + using BifType::Record::Files::AnalyzerArgs; return static_cast( args->Lookup(AnalyzerArgs->FieldOffset("tag"))->AsEnum()); } diff --git a/src/file_analysis/AnalyzerSet.cc b/src/file_analysis/AnalyzerSet.cc index 83c60d9abe..d10e78d338 100644 --- a/src/file_analysis/AnalyzerSet.cc +++ b/src/file_analysis/AnalyzerSet.cc @@ -26,7 +26,7 @@ static void analyzer_del_func(void* v) AnalyzerSet::AnalyzerSet(File* arg_file) : file(arg_file) { TypeList* t = new TypeList(); - t->Append(BifType::Record::FileAnalysis::AnalyzerArgs->Ref()); + t->Append(BifType::Record::Files::AnalyzerArgs->Ref()); analyzer_hash = new CompositeHash(t); Unref(t); analyzer_map.SetDeleteFunc(analyzer_del_func); diff --git a/src/file_analysis/DataEvent.cc b/src/file_analysis/DataEvent.cc index 159c8c19cd..1b04111c44 100644 --- a/src/file_analysis/DataEvent.cc +++ b/src/file_analysis/DataEvent.cc @@ -17,7 +17,7 @@ DataEvent::DataEvent(RecordVal* args, File* file, file_analysis::Analyzer* DataEvent::Instantiate(RecordVal* args, File* file) { - using BifType::Record::FileAnalysis::AnalyzerArgs; + using BifType::Record::Files::AnalyzerArgs; int chunk_off = AnalyzerArgs->FieldOffset("chunk_event"); int stream_off = AnalyzerArgs->FieldOffset("stream_event"); diff --git a/src/file_analysis/Extract.cc b/src/file_analysis/Extract.cc index cbe176d4ca..ef37425003 100644 --- a/src/file_analysis/Extract.cc +++ b/src/file_analysis/Extract.cc @@ -29,7 +29,7 @@ Extract::~Extract() file_analysis::Analyzer* Extract::Instantiate(RecordVal* args, File* file) { - using BifType::Record::FileAnalysis::AnalyzerArgs; + using BifType::Record::Files::AnalyzerArgs; Val* v = args->Lookup(AnalyzerArgs->FieldOffset("extract_filename")); if ( ! v ) diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index b247f23efc..61f9f7a10d 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -38,7 +38,7 @@ string Manager::HashHandle(const string& handle) const static string salt; if ( salt.empty() ) - salt = BifConst::FileAnalysis::salt->CheckString(); + salt = BifConst::Files::salt->CheckString(); char tmp[20]; uint64 hash[2]; @@ -310,7 +310,7 @@ void Manager::GetFileHandle(AnalyzerTag::Tag tag, Connection* c, bool is_orig) bool Manager::IsDisabled(AnalyzerTag::Tag tag) { if ( ! disabled ) - disabled = internal_const_val("FileAnalysis::disable")->AsTableVal(); + disabled = internal_const_val("Files::disable")->AsTableVal(); Val* index = new Val(tag, TYPE_COUNT); Val* yield = disabled->Lookup(index); diff --git a/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro b/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro index 1f15a4221f..e31abe5ea3 100644 --- a/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro +++ b/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro @@ -11,8 +11,8 @@ redef test_get_file_name = function(f: fa_file): string event file_new(f: fa_file) &priority=-10 { for ( tag in test_file_analyzers ) - FileAnalysis::remove_analyzer(f, tag); + Files::remove_analyzer(f, tag); local filename = test_get_file_name(f); - FileAnalysis::remove_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, + Files::remove_analyzer(f, [$tag=Files::ANALYZER_EXTRACT, $extract_filename=filename]); } diff --git a/testing/btest/scripts/base/frameworks/file-analysis/bifs/set_timeout_interval.bro b/testing/btest/scripts/base/frameworks/file-analysis/bifs/set_timeout_interval.bro index 8ec4704cdb..c9eac4c31d 100644 --- a/testing/btest/scripts/base/frameworks/file-analysis/bifs/set_timeout_interval.bro +++ b/testing/btest/scripts/base/frameworks/file-analysis/bifs/set_timeout_interval.bro @@ -20,7 +20,7 @@ redef default_file_timeout_interval = 2sec; event file_timeout(f: fa_file) { if ( timeout_cnt < 1 ) - FileAnalysis::set_timeout_interval(f, f$timeout_interval); + Files::set_timeout_interval(f, f$timeout_interval); else terminate(); ++timeout_cnt; diff --git a/testing/btest/scripts/base/frameworks/file-analysis/bifs/stop.bro b/testing/btest/scripts/base/frameworks/file-analysis/bifs/stop.bro index e994706010..dd40c69684 100644 --- a/testing/btest/scripts/base/frameworks/file-analysis/bifs/stop.bro +++ b/testing/btest/scripts/base/frameworks/file-analysis/bifs/stop.bro @@ -4,5 +4,5 @@ event file_new(f: fa_file) { - FileAnalysis::stop(f); + Files::stop(f); } diff --git a/testing/scripts/file-analysis-test.bro b/testing/scripts/file-analysis-test.bro index 15929dd4f6..cb1027d8f1 100644 --- a/testing/scripts/file-analysis-test.bro +++ b/testing/scripts/file-analysis-test.bro @@ -1,7 +1,7 @@ global test_file_analysis_source: string = "" &redef; -global test_file_analyzers: set[FileAnalysis::AnalyzerArgs]; +global test_file_analyzers: set[Files::AnalyzerArgs]; global test_get_file_name: function(f: fa_file): string = function(f: fa_file): string { return ""; } &redef; @@ -30,13 +30,13 @@ event file_new(f: fa_file) f$source == test_file_analysis_source ) { for ( tag in test_file_analyzers ) - FileAnalysis::add_analyzer(f, tag); + Files::add_analyzer(f, tag); local filename: string = test_get_file_name(f); if ( filename != "" ) - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, + Files::add_analyzer(f, [$tag=Files::ANALYZER_EXTRACT, $extract_filename=filename]); - FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_DATA_EVENT, + Files::add_analyzer(f, [$tag=Files::ANALYZER_DATA_EVENT, $chunk_event=file_chunk, $stream_event=file_stream]); } @@ -94,7 +94,7 @@ event file_state_remove(f: fa_file) event bro_init() { - add test_file_analyzers[[$tag=FileAnalysis::ANALYZER_MD5]]; - add test_file_analyzers[[$tag=FileAnalysis::ANALYZER_SHA1]]; - add test_file_analyzers[[$tag=FileAnalysis::ANALYZER_SHA256]]; + add test_file_analyzers[[$tag=Files::ANALYZER_MD5]]; + add test_file_analyzers[[$tag=Files::ANALYZER_SHA1]]; + add test_file_analyzers[[$tag=Files::ANALYZER_SHA256]]; } From 2b48396d23f2dddb9dcef005fb478d9d12b99dad Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Fri, 5 Jul 2013 02:00:35 -0400 Subject: [PATCH 043/118] Check file_over_new_connetion to fire for each connection (including the first). --- src/file_analysis/File.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc index e68ee5523c..c1ad92c0ed 100644 --- a/src/file_analysis/File.cc +++ b/src/file_analysis/File.cc @@ -116,11 +116,8 @@ void File::UpdateConnectionFields(Connection* conn) Val* conns = val->Lookup(conns_idx); - bool is_first = false; - if ( ! conns ) { - is_first = true; conns = empty_connection_table(); val->Assign(conns_idx, conns); } @@ -131,7 +128,7 @@ void File::UpdateConnectionFields(Connection* conn) Val* conn_val = conn->BuildConnVal(); conns->AsTableVal()->Assign(idx, conn_val); - if ( ! is_first && FileEventAvailable(file_over_new_connection) ) + if ( FileEventAvailable(file_over_new_connection) ) { val_list* vl = new val_list(); vl->append(val->Ref()); From cdf6b7864ecab07bf6a6150cbaa3eb58a12251c0 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Tue, 9 Jul 2013 11:50:54 -0400 Subject: [PATCH 044/118] More file analysis updates. - Recorrected the module name to Files. - Added Files::analyzer_name to get a more readable name for a file analyzer. - Improved and just overall better handled multipart mime transfers in HTTP and SMTP. HTTP now has orig_fuids and resp_fuids log fields since multiple "files" can be transferred with multipart mime in a single request/response pair. SMTP has an fuids field which has file unique IDs for all parts transferred. FTP and IRC have a log field named fuid added because only a single file can be transferred per irc and ftp log line. --- scripts/base/frameworks/files/main.bro | 51 +++++++++----- scripts/base/protocols/ftp/__load__.bro | 2 +- scripts/base/protocols/ftp/file-analysis.bro | 23 ------ scripts/base/protocols/ftp/files.bro | 40 +++++++++++ scripts/base/protocols/http/__load__.bro | 6 +- scripts/base/protocols/http/entities.bro | 70 +++++++++++++++++++ .../http/{file-analysis.bro => files.bro} | 30 ++++---- scripts/base/protocols/http/main.bro | 26 +------ scripts/base/protocols/irc/__load__.bro | 4 +- scripts/base/protocols/irc/dcc-send.bro | 4 +- scripts/base/protocols/irc/files.bro | 41 +++++++++++ scripts/base/protocols/smtp/__load__.bro | 3 +- scripts/base/protocols/smtp/entities.bro | 3 +- scripts/base/protocols/smtp/file-analysis.bro | 22 ------ scripts/base/protocols/smtp/files.bro | 34 +++++++++ src/file_analysis/Manager.cc | 8 +-- src/file_analysis/analyzer/hash/events.bif | 4 +- src/file_analysis/file_analysis.bif | 6 ++ 18 files changed, 257 insertions(+), 120 deletions(-) delete mode 100644 scripts/base/protocols/ftp/file-analysis.bro create mode 100644 scripts/base/protocols/ftp/files.bro create mode 100644 scripts/base/protocols/http/entities.bro rename scripts/base/protocols/http/{file-analysis.bro => files.bro} (50%) create mode 100644 scripts/base/protocols/irc/files.bro delete mode 100644 scripts/base/protocols/smtp/file-analysis.bro create mode 100644 scripts/base/protocols/smtp/files.bro diff --git a/scripts/base/frameworks/files/main.bro b/scripts/base/frameworks/files/main.bro index d6e26e1833..d5a3ddee67 100644 --- a/scripts/base/frameworks/files/main.bro +++ b/scripts/base/frameworks/files/main.bro @@ -61,7 +61,7 @@ export { depth: count &default=0 &log; ## A set of analysis types done during the file analysis. - analyzers: set[Analyzer] &log; + analyzers: set[string] &log; ## A mime type provided by libmagic against the *bof_buffer*, or ## in the cases where no buffering of the beginning of file occurs, @@ -76,11 +76,16 @@ export { ## The duration the file was analyzed for. duration: interval &log &default=0secs; - ## If the source of this file is is a network connection, this field + ## If the source of this file is a network connection, this field ## indicates if the data originated from the local network or not as ## determined by the configured bro:see:`Site::local_nets`. local_orig: bool &log &optional; + ## If the source of this file is a network connection, this field + ## indicates if the file is being sent by the originator of the connection + ## or the responder. + is_orig: bool &log &optional; + ## Number of bytes provided to the file analysis engine for the file. seen_bytes: count &log &default=0; @@ -105,7 +110,7 @@ export { ## A table that can be used to disable file analysis completely for ## any files transferred over given network protocol analyzers. - const disable: table[Analyzer::Tag] of bool = table() &redef; + const disable: table[Files::Tag] of bool = table() &redef; ## The salt concatenated to unique file handle strings generated by ## :bro:see:`get_file_handle` before hashing them in to a file id @@ -139,7 +144,7 @@ export { ## for the *id* isn't currently active or the *args* ## were invalid for the analyzer type. global add_analyzer: function(f: fa_file, - tag: Files::Analyzer, + tag: Files::Tag, args: AnalyzerArgs &default=AnalyzerArgs()): bool; ## Removes an analyzer from the analysis of a given file. @@ -150,7 +155,7 @@ export { ## ## Returns: true if the analyzer will be removed, or false if analysis ## for the *id* isn't currently active. - global remove_analyzer: function(f: fa_file, tag: Files::Analyzer, args: AnalyzerArgs): bool; + global remove_analyzer: function(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool; ## Stops/ignores any further analysis of a given file. ## @@ -161,6 +166,13 @@ export { ## isn't currently active. global stop: function(f: fa_file): bool; + ## Translates an file analyzer enum value to a string with the analyzer's name. + ## + ## tag: The analyzer tag. + ## + ## Returns: The analyzer name corresponding to the tag. + global analyzer_name: function(tag: Files::Tag): string; + ## Register callbacks for protocols that work with the Files framework. ## The callbacks must uniquely identify a file and each protocol can ## only have a single callback registered for it. @@ -171,7 +183,7 @@ export { ## defined previously. ## ## Returns: true if the protocol being registered was not previously registered. - global register_protocol: function(tag: AnalyzerTag, callback: function(c: connection, is_orig: bool): string): bool; + global register_protocol: function(tag: Files::Tag, callback: function(c: connection, is_orig: bool): string): bool; ## Register a callback for file analyzers to use if they need to do some manipulation ## when they are being added to a file before the core code takes over. This is @@ -181,7 +193,7 @@ export { ## tag: Tag for the file analyzer. ## ## callback: Function to execute when the given file analyzer is being added. - global register_analyzer_add_callback: function(tag: Files::Analyzer, callback: function(f: fa_file, args: AnalyzerArgs)); + global register_analyzer_add_callback: function(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs)); ## Event that can be handled to access the Info record as it is sent on ## to the logging framework. @@ -194,14 +206,14 @@ redef record fa_file += { redef record AnalyzerArgs += { # This is used interally for the core file analyzer api. - tag: Files::Analyzer &optional; + tag: Files::Tag &optional; }; # Store the callbacks for protocol analyzers that have files. -global registered_protocols: table[AnalyzerTag] of function(c: connection, is_orig: bool): string = table() +global registered_protocols: table[Files::Tag] of function(c: connection, is_orig: bool): string = table() &default=function(c: connection, is_orig: bool): string { return cat(c$uid, is_orig); }; -global analyzer_add_callbacks: table[Files::Analyzer] of function(f: fa_file, args: AnalyzerArgs) = table(); +global analyzer_add_callbacks: table[Files::Tag] of function(f: fa_file, args: AnalyzerArgs) = table(); event bro_init() &priority=5 { @@ -227,6 +239,8 @@ function set_info(f: fa_file) f$info$total_bytes = f$total_bytes; f$info$missing_bytes = f$missing_bytes; f$info$overflow_bytes = f$overflow_bytes; + if ( f?$is_orig ) + f$info$is_orig = f$is_orig; if ( f?$mime_type ) f$info$mime_type = f$mime_type; } @@ -236,11 +250,11 @@ function set_timeout_interval(f: fa_file, t: interval): bool return __set_timeout_interval(f$id, t); } -function add_analyzer(f: fa_file, tag: Analyzer, args: AnalyzerArgs): bool +function add_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool { # This is to construct the correct args for the core API. args$tag = tag; - add f$info$analyzers[tag]; + add f$info$analyzers[Files::analyzer_name(tag)]; if ( tag in analyzer_add_callbacks ) analyzer_add_callbacks[tag](f, args); @@ -253,12 +267,12 @@ function add_analyzer(f: fa_file, tag: Analyzer, args: AnalyzerArgs): bool return T; } -function register_analyzer_add_callback(tag: Files::Analyzer, callback: function(f: fa_file, args: AnalyzerArgs)) +function register_analyzer_add_callback(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs)) { analyzer_add_callbacks[tag] = callback; } -function remove_analyzer(f: fa_file, tag: Files::Analyzer, args: AnalyzerArgs): bool +function remove_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool { args$tag = tag; return __remove_analyzer(f$id, args); @@ -269,6 +283,11 @@ function stop(f: fa_file): bool return __stop(f$id); } +function analyzer_name(tag: Files::Tag): string + { + return __analyzer_name(tag); + } + event file_new(f: fa_file) &priority=10 { set_info(f); @@ -302,14 +321,14 @@ event file_state_remove(f: fa_file) &priority=-10 Log::write(Files::LOG, f$info); } -function register_protocol(tag: AnalyzerTag, callback: function(c: connection, is_orig: bool): string): bool +function register_protocol(tag: Files::Tag, callback: function(c: connection, is_orig: bool): string): bool { local result = (tag !in registered_protocols); registered_protocols[tag] = callback; return result; } -event get_file_handle(tag: AnalyzerTag, c: connection, is_orig: bool) &priority=5 +event get_file_handle(tag: Files::Tag, c: connection, is_orig: bool) &priority=5 { local handler = registered_protocols[tag]; set_file_handle(handler(c, is_orig)); diff --git a/scripts/base/protocols/ftp/__load__.bro b/scripts/base/protocols/ftp/__load__.bro index 9c839610ac..6fffd5ec43 100644 --- a/scripts/base/protocols/ftp/__load__.bro +++ b/scripts/base/protocols/ftp/__load__.bro @@ -1,4 +1,4 @@ @load ./utils-commands @load ./main -@load ./file-analysis +@load ./files @load ./gridftp diff --git a/scripts/base/protocols/ftp/file-analysis.bro b/scripts/base/protocols/ftp/file-analysis.bro deleted file mode 100644 index 3710a44cee..0000000000 --- a/scripts/base/protocols/ftp/file-analysis.bro +++ /dev/null @@ -1,23 +0,0 @@ -@load ./main -@load base/utils/conn-ids -@load base/frameworks/files - -module FTP; - -export { - ## Default file handle provider for FTP. - global get_file_handle: function(c: connection, is_orig: bool): string; -} - -function get_file_handle(c: connection, is_orig: bool): string - { - if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) - return ""; - - return cat(ANALYZER_FTP_DATA, c$start_time, c$id, is_orig); - } - -event bro_init() &priority=5 - { - Files::register_protocol(ANALYZER_FTP_DATA, FTP::get_file_handle); - } diff --git a/scripts/base/protocols/ftp/files.bro b/scripts/base/protocols/ftp/files.bro new file mode 100644 index 0000000000..a943adff9d --- /dev/null +++ b/scripts/base/protocols/ftp/files.bro @@ -0,0 +1,40 @@ +@load ./main +@load base/utils/conn-ids +@load base/frameworks/files + +module FTP; + +export { + redef record Info += { + ## File unique ID. + fuid: string &optional &log; + }; + + ## Default file handle provider for FTP. + global get_file_handle: function(c: connection, is_orig: bool): string; +} + +function get_file_handle(c: connection, is_orig: bool): string + { + if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) + return ""; + + return cat(Analyzer::ANALYZER_FTP_DATA, c$start_time, c$id, is_orig); + } + +event bro_init() &priority=5 + { + Files::register_protocol(Analyzer::ANALYZER_FTP_DATA, FTP::get_file_handle); + } + + +event file_over_new_connection(f: fa_file, c: connection) &priority=5 + { + if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) + return; + + local ftp = ftp_data_expected[c$id$resp_h, c$id$resp_p]; + ftp$fuid = f$id; + if ( f?$mime_type ) + ftp$mime_type = f$mime_type; + } \ No newline at end of file diff --git a/scripts/base/protocols/http/__load__.bro b/scripts/base/protocols/http/__load__.bro index 585b815eed..f0cec220d3 100644 --- a/scripts/base/protocols/http/__load__.bro +++ b/scripts/base/protocols/http/__load__.bro @@ -1,6 +1,4 @@ @load ./main +@load ./entities @load ./utils -@load ./file-analysis -#@load ./file-ident -#@load ./file-hash -#@load ./file-extract +@load ./files \ No newline at end of file diff --git a/scripts/base/protocols/http/entities.bro b/scripts/base/protocols/http/entities.bro new file mode 100644 index 0000000000..cc852a7e11 --- /dev/null +++ b/scripts/base/protocols/http/entities.bro @@ -0,0 +1,70 @@ +##! Analysis and logging for MIME entities found in HTTP sessions. + +@load base/frameworks/files +@load base/utils/strings +@load base/utils/files +@load ./main + +module HTTP; + +export { + type Entity: record { + ## Depth of the entity if multiple entities are sent in a single transaction. + depth: count &default=0; + + ## Filename for the entity if discovered from a header. + filename: string &optional; + }; + + redef record Info += { + ## The current entity being seen. + entity: Entity &optional; + + ## Current number of MIME entities in the HTTP request message body. + orig_mime_depth: count &default=0; + ## Current number of MIME entities in the HTTP response message body. + resp_mime_depth: count &default=0; + }; +} + +event http_begin_entity(c: connection, is_orig: bool) &priority=10 + { + set_state(c, F, is_orig); + + if ( is_orig ) + ++c$http$orig_mime_depth; + else + ++c$http$resp_mime_depth; + + c$http$entity = Entity($depth = is_orig ? c$http$orig_mime_depth : c$http$resp_mime_depth); + } + +event http_header(c: connection, is_orig: bool, name: string, value: string) &priority=3 + { + if ( name == "CONTENT-DISPOSITION" && + /[fF][iI][lL][eE][nN][aA][mM][eE]/ in value ) + { + c$http$entity$filename = extract_filename_from_content_disposition(value); + } + else if ( name == "CONTENT-TYPE" && + /[nN][aA][mM][eE][:blank:]*=/ in value ) + { + c$http$entity$filename = extract_filename_from_content_disposition(value); + } + } + +event file_over_new_connection(f: fa_file, c: connection) &priority=5 + { + if ( f$source == "HTTP" && c$http?$entity ) + { + f$info$depth = c$http$entity$depth; + if ( c$http$entity?$filename ) + f$info$filename = c$http$entity$filename; + } + } + +event http_end_entity(c: connection, is_orig: bool) &priority=5 + { + if ( c?$http && c$http?$entity ) + delete c$http$entity; + } diff --git a/scripts/base/protocols/http/file-analysis.bro b/scripts/base/protocols/http/files.bro similarity index 50% rename from scripts/base/protocols/http/file-analysis.bro rename to scripts/base/protocols/http/files.bro index b79ca041b8..44fdc4c1f4 100644 --- a/scripts/base/protocols/http/file-analysis.bro +++ b/scripts/base/protocols/http/files.bro @@ -1,17 +1,17 @@ @load ./main +@load ./entities @load ./utils -@load base/utils/conn-ids @load base/frameworks/files module HTTP; export { redef record Info += { - ## The sniffed mime type of the data being sent by the client. - client_mime_type: string &log &optional; + ## An ordered vector of file unique IDs seen sent by the originator (client). + orig_fuids: vector of string &log &default=string_vec(); - ## The sniffed mime type of the data being returned by the server. - mime_type: string &log &optional; + ## An ordered vector of file unique IDs seen sent by the responder (server). + resp_fuids: vector of string &log &default=string_vec(); }; ## Default file handle provider for HTTP. @@ -26,33 +26,27 @@ function get_file_handle(c: connection, is_orig: bool): string local mime_depth = is_orig ? c$http$orig_mime_depth : c$http$resp_mime_depth; if ( c$http$range_request ) { - return cat(ANALYZER_HTTP, is_orig, c$id$orig_h, mime_depth, build_url(c$http)); + return cat(Analyzer::ANALYZER_HTTP, is_orig, c$id$orig_h, mime_depth, build_url(c$http)); } else { - return cat(ANALYZER_HTTP, c$start_time, is_orig, + return cat(Analyzer::ANALYZER_HTTP, c$start_time, is_orig, c$http$trans_depth, mime_depth, id_string(c$id)); } } event bro_init() &priority=5 { - Files::register_protocol(ANALYZER_HTTP, HTTP::get_file_handle); + Files::register_protocol(Analyzer::ANALYZER_HTTP, HTTP::get_file_handle); } event file_over_new_connection(f: fa_file, c: connection) &priority=5 { if ( c?$http ) { - #if (!f?$mime_type) - # print f; -# - #if ( f$is_orig ) - # c$http$client_mime_type = f$mime_type; - #else - # c$http$mime_type = f$mime_type; - - if ( c$http?$filename ) - f$info$filename = c$http$filename; + if ( f$is_orig ) + c$http$orig_fuids[|c$http$orig_fuids|] = f$id; + else + c$http$resp_fuids[|c$http$resp_fuids|] = f$id; } } \ No newline at end of file diff --git a/scripts/base/protocols/http/main.bro b/scripts/base/protocols/http/main.bro index a982fdc9c6..d96384ee5f 100644 --- a/scripts/base/protocols/http/main.bro +++ b/scripts/base/protocols/http/main.bro @@ -75,10 +75,6 @@ export { ## Indicates if this request can assume 206 partial content in ## response. range_request: bool &default=F; - ## Number of MIME entities in the HTTP request message body so far. - orig_mime_depth: count &default=0; - ## Number of MIME entities in the HTTP response message body so far. - resp_mime_depth: count &default=0; }; ## Structure to maintain state for an HTTP connection with multiple @@ -104,8 +100,8 @@ export { } &redef; ## A list of HTTP methods. Other methods will generate a weird. Note - ## that the HTTP analyzer will only accept methods consisting solely - ## of letters ``[A-Za-z]``. + ## that the HTTP analyzer will only accept methods consisting solely + ## of letters ``[A-Za-z]``. const http_methods: set[string] = { "GET", "POST", "HEAD", "OPTIONS", "PUT", "DELETE", "TRACE", "CONNECT", @@ -275,25 +271,9 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr } } } - - else # server headers - { - if ( name == "CONTENT-DISPOSITION" && - /[fF][iI][lL][eE][nN][aA][mM][eE]/ in value ) - c$http$filename = extract_filename_from_content_disposition(value); - } + } -event http_begin_entity(c: connection, is_orig: bool) &priority=5 - { - set_state(c, F, is_orig); - - if ( is_orig ) - ++c$http$orig_mime_depth; - else - ++c$http$resp_mime_depth; - } - event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority = 5 { set_state(c, F, is_orig); diff --git a/scripts/base/protocols/irc/__load__.bro b/scripts/base/protocols/irc/__load__.bro index d20550c54f..afb7fecc62 100644 --- a/scripts/base/protocols/irc/__load__.bro +++ b/scripts/base/protocols/irc/__load__.bro @@ -1,3 +1,3 @@ @load ./main -#@load ./dcc-send -@load ./file-analysis +@load ./dcc-send +@load ./files \ No newline at end of file diff --git a/scripts/base/protocols/irc/dcc-send.bro b/scripts/base/protocols/irc/dcc-send.bro index b79eb370e6..83b32faf2b 100644 --- a/scripts/base/protocols/irc/dcc-send.bro +++ b/scripts/base/protocols/irc/dcc-send.bro @@ -49,13 +49,15 @@ function log_dcc(f: fa_file) delete irc$dcc_file_name; delete irc$dcc_file_size; delete irc$dcc_mime_type; + + delete dcc_expected_transfers[cid$resp_h, cid$resp_p]; return; } } event file_new(f: fa_file) &priority=-5 { - if ( f?$source && f$source == "IRC_DATA" ) + if ( f$source == "IRC_DATA" ) log_dcc(f); } diff --git a/scripts/base/protocols/irc/files.bro b/scripts/base/protocols/irc/files.bro new file mode 100644 index 0000000000..f4553b534a --- /dev/null +++ b/scripts/base/protocols/irc/files.bro @@ -0,0 +1,41 @@ +@load ./dcc-send +@load base/utils/conn-ids +@load base/frameworks/files + +module IRC; + +export { + redef record Info += { + ## File unique ID. + fuid: string &log &optional; + }; + + ## Default file handle provider for IRC. + global get_file_handle: function(c: connection, is_orig: bool): string; +} + +function get_file_handle(c: connection, is_orig: bool): string + { + if ( [c$id$resp_h, c$id$resp_p] !in dcc_expected_transfers ) + return ""; + + return cat(Analyzer::ANALYZER_IRC_DATA, c$start_time, c$id, is_orig); + } + +event bro_init() &priority=5 + { + Files::register_protocol(Analyzer::ANALYZER_IRC_DATA, IRC::get_file_handle); + } + +event file_over_new_connection(f: fa_file, c: connection) &priority=5 + { + if ( [c$id$resp_h, c$id$resp_p] !in dcc_expected_transfers ) + return; + + local irc = dcc_expected_transfers[c$id$resp_h, c$id$resp_p]; + irc$fuid = f$id; + if ( irc?$dcc_file_name ) + f$info$filename = irc$dcc_file_name; + if ( f?$mime_type ) + irc$dcc_mime_type = f$mime_type; + } \ No newline at end of file diff --git a/scripts/base/protocols/smtp/__load__.bro b/scripts/base/protocols/smtp/__load__.bro index 1e913d8dff..a37c2ed3de 100644 --- a/scripts/base/protocols/smtp/__load__.bro +++ b/scripts/base/protocols/smtp/__load__.bro @@ -1,4 +1,3 @@ @load ./main @load ./entities -#@load ./entities-excerpt -@load ./file-analysis +@load ./files \ No newline at end of file diff --git a/scripts/base/protocols/smtp/entities.bro b/scripts/base/protocols/smtp/entities.bro index dcb53dc0aa..067b8acf8e 100644 --- a/scripts/base/protocols/smtp/entities.bro +++ b/scripts/base/protocols/smtp/entities.bro @@ -9,6 +9,7 @@ module SMTP; export { type Entity: record { + ## Filename for the entity if discovered from a header. filename: string &optional; }; @@ -26,8 +27,6 @@ export { event mime_begin_entity(c: connection) &priority=10 { - #print fmt("%s : begin entity", c$uid); - c$smtp$entity = Entity(); ++c$smtp_state$mime_depth; } diff --git a/scripts/base/protocols/smtp/file-analysis.bro b/scripts/base/protocols/smtp/file-analysis.bro deleted file mode 100644 index 44938c8698..0000000000 --- a/scripts/base/protocols/smtp/file-analysis.bro +++ /dev/null @@ -1,22 +0,0 @@ -@load ./main -@load ./entities -@load base/utils/conn-ids -@load base/frameworks/files - -module SMTP; - -export { - ## Default file handle provider for SMTP. - global get_file_handle: function(c: connection, is_orig: bool): string; -} - -function get_file_handle(c: connection, is_orig: bool): string - { - return cat(ANALYZER_SMTP, c$start_time, c$smtp$trans_depth, - c$smtp_state$mime_depth); - } - -event bro_init() &priority=5 - { - Files::register_protocol(ANALYZER_SMTP, SMTP::get_file_handle); - } diff --git a/scripts/base/protocols/smtp/files.bro b/scripts/base/protocols/smtp/files.bro new file mode 100644 index 0000000000..e67181d6bc --- /dev/null +++ b/scripts/base/protocols/smtp/files.bro @@ -0,0 +1,34 @@ +@load ./main +@load ./entities +@load base/utils/conn-ids +@load base/frameworks/files + +module SMTP; + +export { + redef record Info += { + ## An ordered vector of file unique IDs seen attached to + ## the message. + fuids: vector of string &log &default=string_vec(); + }; + + ## Default file handle provider for SMTP. + global get_file_handle: function(c: connection, is_orig: bool): string; +} + +function get_file_handle(c: connection, is_orig: bool): string + { + return cat(Analyzer::ANALYZER_SMTP, c$start_time, c$smtp$trans_depth, + c$smtp_state$mime_depth); + } + +event bro_init() &priority=5 + { + Files::register_protocol(Analyzer::ANALYZER_SMTP, SMTP::get_file_handle); + } + +event file_over_new_connection(f: fa_file, c: connection) &priority=5 + { + if ( c?$smtp ) + c$smtp$fuids[|c$smtp$fuids|] = f$id; + } \ No newline at end of file diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index 02af4aa9f1..453c6f7902 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -19,8 +19,8 @@ string Manager::salt; Manager::Manager() { - tag_enum_type = new EnumType("FileAnalysis::Tag"); - ::ID* id = install_ID("Tag", "FileAnalysis", true, true); + tag_enum_type = new EnumType("Files::Tag"); + ::ID* id = install_ID("Tag", "Files", true, true); add_type(id, tag_enum_type, 0, 0); } @@ -42,7 +42,7 @@ void Manager::RegisterAnalyzerComponent(Component* component) { const char* cname = component->CanonicalName(); - if ( tag_enum_type->Lookup("FileAnalysis", cname) != -1 ) + if ( tag_enum_type->Lookup("Files", cname) != -1 ) reporter->FatalError("File Analyzer %s defined more than once", cname); DBG_LOG(DBG_FILE_ANALYSIS, "Registering analyzer %s (tag %s)", @@ -54,7 +54,7 @@ void Manager::RegisterAnalyzerComponent(Component* component) component->Tag().AsEnumVal()->InternalInt(), component)); string id = fmt("ANALYZER_%s", cname); - tag_enum_type->AddName("FileAnalysis", id.c_str(), + tag_enum_type->AddName("Files", id.c_str(), component->Tag().AsEnumVal()->InternalInt(), true); } diff --git a/src/file_analysis/analyzer/hash/events.bif b/src/file_analysis/analyzer/hash/events.bif index b4a8de1c74..e03cbf359a 100644 --- a/src/file_analysis/analyzer/hash/events.bif +++ b/src/file_analysis/analyzer/hash/events.bif @@ -7,6 +7,6 @@ ## ## hash: The result of the hashing. ## -## .. bro:see:: FileAnalysis::add_analyzer FileAnalysis::ANALYZER_MD5 -## FileAnalysis::ANALYZER_SHA1 FileAnalysis::ANALYZER_SHA256 +## .. bro:see:: Files::add_analyzer Files::ANALYZER_MD5 +## Files::ANALYZER_SHA1 Files::ANALYZER_SHA256 event file_hash%(f: fa_file, kind: string, hash: string%); diff --git a/src/file_analysis/file_analysis.bif b/src/file_analysis/file_analysis.bif index 148e6360da..b6c80ac800 100644 --- a/src/file_analysis/file_analysis.bif +++ b/src/file_analysis/file_analysis.bif @@ -42,6 +42,12 @@ function Files::__stop%(file_id: string%): bool return new Val(result, TYPE_BOOL); %} +## :bro:see:`Files::analyzer_name`. +function Files::__analyzer_name%(tag: Files::Tag%) : string + %{ + return new StringVal(file_mgr->GetAnalyzerName(tag->InternalInt())); + %} + module GLOBAL; ## For use within a :bro:see:`get_file_handle` handler to set a unique From ecfac31de0b5d69254b590939c3a56be4038e0d6 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Tue, 9 Jul 2013 11:51:23 -0400 Subject: [PATCH 045/118] Fixed SMTP URL extraction for the Intel framework with Files updates. --- .../frameworks/intel/smtp-url-extraction.bro | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/scripts/policy/frameworks/intel/smtp-url-extraction.bro b/scripts/policy/frameworks/intel/smtp-url-extraction.bro index b4ab32a915..2478eba9f8 100644 --- a/scripts/policy/frameworks/intel/smtp-url-extraction.bro +++ b/scripts/policy/frameworks/intel/smtp-url-extraction.bro @@ -1,11 +1,12 @@ @load base/frameworks/intel -@load base/protocols/smtp/file-analysis +@load base/protocols/smtp @load base/utils/urls @load ./where-locations event intel_mime_data(f: fa_file, data: string) { - if ( ! f?$conns ) return; + if ( ! f?$conns ) + return; for ( cid in f$conns ) { @@ -21,11 +22,8 @@ event intel_mime_data(f: fa_file, data: string) } } -event file_new(f: fa_file) &priority=5 +event file_new(f: fa_file) { - if ( ! f?$source ) return; - if ( f$source != "SMTP" ) return; - - Files::add_analyzer(f, [$tag=Files::ANALYZER_DATA_EVENT, - $stream_event=intel_mime_data]); + if ( f$source == "SMTP" ) + Files::add_analyzer(f, Files::ANALYZER_DATA_EVENT, [$stream_event=intel_mime_data]); } From 5dbc354898454bb3e8b0970119925b42bec213f7 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Tue, 9 Jul 2013 14:05:36 -0400 Subject: [PATCH 046/118] extract_filename_from_content_disposition is still hacky but more closely aligns with RFC5987 --- scripts/base/utils/files.bro | 14 ++++++++++---- .../btest/Baseline/scripts.base.utils.files/output | 3 +++ testing/btest/scripts/base/utils/files.test | 7 +++++++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/scripts/base/utils/files.bro b/scripts/base/utils/files.bro index 76d2ecea4f..fedd93ab47 100644 --- a/scripts/base/utils/files.bro +++ b/scripts/base/utils/files.bro @@ -19,9 +19,15 @@ function generate_extraction_filename(prefix: string, c: connection, suffix: str ## the filename. function extract_filename_from_content_disposition(data: string): string { - local filename = sub(data, /^.*[nN][aA][mM][eE][[:blank:]]*=[[:blank:]]*/, ""); + local filename = sub(data, /^.*[nN][aA][mM][eE][[:blank:]]*\*?=[[:blank:]]*/, ""); + # Remove quotes around the filename if they are there. if ( /^\"/ in filename ) - filename = split_n(filename, /\"/, F, 2)[2]; - return filename; - } + filename = split_n(filename, /\"/, F, 2)[2]; + + # Remove the language and encoding if it's there. + if ( /^[a-zA-Z0-9\!#$%&+-^_`{}~]+'[a-zA-Z0-9\!#$%&+-^_`{}~]*'/ in filename ) + filename = sub(filename, /^.+'.*'/, ""); + + return unescape_URI(filename); + } \ No newline at end of file diff --git a/testing/btest/Baseline/scripts.base.utils.files/output b/testing/btest/Baseline/scripts.base.utils.files/output index ab92c3a624..4d53bcedd3 100644 --- a/testing/btest/Baseline/scripts.base.utils.files/output +++ b/testing/btest/Baseline/scripts.base.utils.files/output @@ -1,3 +1,6 @@ +Economy +US-$ rates +\xa3 rates test-prefix_141.142.220.118:48649-208.80.152.118:80_test-suffix test-prefix_141.142.220.118:48649-208.80.152.118:80 141.142.220.118:48649-208.80.152.118:80_test-suffix diff --git a/testing/btest/scripts/base/utils/files.test b/testing/btest/scripts/base/utils/files.test index 84eff13187..3324522030 100644 --- a/testing/btest/scripts/base/utils/files.test +++ b/testing/btest/scripts/base/utils/files.test @@ -11,3 +11,10 @@ event connection_established(c: connection) print generate_extraction_filename("", c, "test-suffix"); print generate_extraction_filename("", c, ""); } + +event bro_init() + { + print extract_filename_from_content_disposition("attachment; filename=Economy"); + print extract_filename_from_content_disposition("attachment; name=\"US-$ rates\""); + print extract_filename_from_content_disposition("attachment; filename*=iso-8859-1'en'%A3%20rates"); + } \ No newline at end of file From 6a5b8250589e7e9d9b2036fa4fe2230561e5428f Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 9 Jul 2013 14:25:41 -0500 Subject: [PATCH 047/118] Delay file_over_new_connection events until after file_new occurs. --- src/file_analysis/File.cc | 23 +++++++++++++++++++++-- src/file_analysis/File.h | 4 ++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc index b5edfaedc9..ed3d2ae9a8 100644 --- a/src/file_analysis/File.cc +++ b/src/file_analysis/File.cc @@ -75,7 +75,8 @@ void File::StaticInit() File::File(const string& file_id, Connection* conn, analyzer::Tag tag, bool is_orig) : id(file_id), val(0), postpone_timeout(false), first_chunk(true), - missed_bof(false), need_reassembly(false), done(false), analyzers(this) + missed_bof(false), need_reassembly(false), done(false), + did_file_new_event(false), analyzers(this) { StaticInit(); @@ -99,6 +100,7 @@ File::~File() { DBG_LOG(DBG_FILE_ANALYSIS, "Destroying File object %s", id.c_str()); Unref(val); + assert(fonc_queue.empty()); } void File::UpdateLastActivityTime() @@ -135,7 +137,12 @@ void File::UpdateConnectionFields(Connection* conn) val_list* vl = new val_list(); vl->append(val->Ref()); vl->append(conn_val->Ref()); - FileEvent(file_over_new_connection, vl); + + if ( did_file_new_event ) + FileEvent(file_over_new_connection, vl); + else + fonc_queue.push(pair( + file_over_new_connection, vl)); } } @@ -432,6 +439,18 @@ void File::FileEvent(EventHandlerPtr h, val_list* vl) { mgr.QueueEvent(h, vl); + if ( h == file_new ) + { + did_file_new_event = true; + + while ( ! fonc_queue.empty() ) + { + pair p = fonc_queue.front(); + mgr.QueueEvent(p.first, p.second); + fonc_queue.pop(); + } + } + if ( h == file_new || h == file_timeout ) { // immediate feedback is required for these events. diff --git a/src/file_analysis/File.h b/src/file_analysis/File.h index ac54c75bc5..5d967e7356 100644 --- a/src/file_analysis/File.h +++ b/src/file_analysis/File.h @@ -3,7 +3,9 @@ #ifndef FILE_ANALYSIS_FILE_H #define FILE_ANALYSIS_FILE_H +#include #include +#include #include #include "Conn.h" @@ -239,7 +241,9 @@ private: bool missed_bof; /**< Flags that we missed start of file. */ bool need_reassembly; /**< Whether file stream reassembly is needed. */ bool done; /**< If this object is about to be deleted. */ + bool did_file_new_event; /**< Whether the file_new event has been done. */ AnalyzerSet analyzers; /**< A set of attached file analyzer. */ + queue > fonc_queue; struct BOF_Buffer { BOF_Buffer() : full(false), replayed(false), size(0) {} From da4a0bed03dd9b4904716844a271c7074fcc17ee Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 9 Jul 2013 15:55:33 -0500 Subject: [PATCH 048/118] Disable more libmagic builtin checks that override the magic database. --- src/util.cc | 2 +- src/util.h | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/util.cc b/src/util.cc index cff36f0f23..5a63be22cb 100644 --- a/src/util.cc +++ b/src/util.cc @@ -1578,7 +1578,7 @@ void bro_init_magic(magic_t* cookie_ptr, int flags) if ( ! cookie_ptr || *cookie_ptr ) return; - *cookie_ptr = magic_open(flags|MAGIC_NO_CHECK_TOKENS); + *cookie_ptr = magic_open(flags|DISABLE_LIBMAGIC_BUILTIN_CHECKS); // Use our custom database for mime types, but the default database // from libmagic for the verbose file type. diff --git a/src/util.h b/src/util.h index cafa63b7e8..91ed8f2888 100644 --- a/src/util.h +++ b/src/util.h @@ -377,6 +377,23 @@ struct CompareString } }; +// Older versions of libmagic may not define the MAGIC_NO_CHECK_BUILTIN +// convenience macro and other newer versions seem to have a typo that makes +// it unusable, so just make a different one now with all known flags for +// builtin libmagic components that should be disabled so that Bro only +// uses the custom magic database shipped with it. +#define DISABLE_LIBMAGIC_BUILTIN_CHECKS ( \ + MAGIC_NO_CHECK_COMPRESS | \ + MAGIC_NO_CHECK_TAR | \ +/* MAGIC_NO_CHECK_SOFT | */ \ + MAGIC_NO_CHECK_APPTYPE | \ + MAGIC_NO_CHECK_ELF | \ + MAGIC_NO_CHECK_TEXT | \ + MAGIC_NO_CHECK_CDF | \ + MAGIC_NO_CHECK_TOKENS | \ + MAGIC_NO_CHECK_ENCODING \ +) + extern magic_t magic_desc_cookie; extern magic_t magic_mime_cookie; From efe878f3de6999c7b3f28fde79af7e4b43fd1180 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 9 Jul 2013 15:56:47 -0500 Subject: [PATCH 049/118] Make magic for emitting application/msword mime type less strict. --- magic/msdos | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/magic/msdos b/magic/msdos index 59a9d2caac..cc411aeeb7 100644 --- a/magic/msdos +++ b/magic/msdos @@ -349,12 +349,13 @@ # False positive with PPT (also currently this string is too long) #0 string/b \xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3E\x00\x03\x00\xFE\xFF\x09\x00\x06 Microsoft Installer 0 string/b \320\317\021\340\241\261\032\341 Microsoft Office Document +!:mime application/msword #>48 byte 0x1B Excel Document #!:mime application/vnd.ms-excel ->546 string bjbj Microsoft Word Document -!:mime application/msword ->546 string jbjb Microsoft Word Document -!:mime application/msword +#>546 string bjbj Microsoft Word Document +#!:mime application/msword +#>546 string jbjb Microsoft Word Document +#!:mime application/msword 0 string/b \224\246\056 Microsoft Word Document !:mime application/msword From 73155c321bdd82a762b9642b1bcf55f45e784e94 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 9 Jul 2013 15:58:28 -0500 Subject: [PATCH 050/118] Add an is_orig parameter to file_over_new_connection event. --- scripts/base/frameworks/files/main.bro | 2 +- scripts/base/protocols/ftp/files.bro | 4 ++-- scripts/base/protocols/http/entities.bro | 2 +- scripts/base/protocols/http/files.bro | 4 ++-- scripts/base/protocols/irc/files.bro | 4 ++-- scripts/base/protocols/smtp/entities.bro | 2 +- scripts/base/protocols/smtp/files.bro | 4 ++-- src/event.bif | 4 +++- src/file_analysis/File.cc | 5 +++-- src/file_analysis/File.h | 3 ++- src/file_analysis/Manager.cc | 2 +- testing/scripts/file-analysis-test.bro | 2 +- 12 files changed, 21 insertions(+), 17 deletions(-) diff --git a/scripts/base/frameworks/files/main.bro b/scripts/base/frameworks/files/main.bro index d5a3ddee67..8dd07fcb53 100644 --- a/scripts/base/frameworks/files/main.bro +++ b/scripts/base/frameworks/files/main.bro @@ -293,7 +293,7 @@ event file_new(f: fa_file) &priority=10 set_info(f); } -event file_over_new_connection(f: fa_file, c: connection) &priority=10 +event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=10 { set_info(f); add f$info$conn_uids[c$uid]; diff --git a/scripts/base/protocols/ftp/files.bro b/scripts/base/protocols/ftp/files.bro index a943adff9d..c68717c8a2 100644 --- a/scripts/base/protocols/ftp/files.bro +++ b/scripts/base/protocols/ftp/files.bro @@ -28,7 +28,7 @@ event bro_init() &priority=5 } -event file_over_new_connection(f: fa_file, c: connection) &priority=5 +event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5 { if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) return; @@ -37,4 +37,4 @@ event file_over_new_connection(f: fa_file, c: connection) &priority=5 ftp$fuid = f$id; if ( f?$mime_type ) ftp$mime_type = f$mime_type; - } \ No newline at end of file + } diff --git a/scripts/base/protocols/http/entities.bro b/scripts/base/protocols/http/entities.bro index cc852a7e11..fc8ab753ae 100644 --- a/scripts/base/protocols/http/entities.bro +++ b/scripts/base/protocols/http/entities.bro @@ -53,7 +53,7 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr } } -event file_over_new_connection(f: fa_file, c: connection) &priority=5 +event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5 { if ( f$source == "HTTP" && c$http?$entity ) { diff --git a/scripts/base/protocols/http/files.bro b/scripts/base/protocols/http/files.bro index 44fdc4c1f4..e45ff8cadb 100644 --- a/scripts/base/protocols/http/files.bro +++ b/scripts/base/protocols/http/files.bro @@ -40,7 +40,7 @@ event bro_init() &priority=5 Files::register_protocol(Analyzer::ANALYZER_HTTP, HTTP::get_file_handle); } -event file_over_new_connection(f: fa_file, c: connection) &priority=5 +event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5 { if ( c?$http ) { @@ -49,4 +49,4 @@ event file_over_new_connection(f: fa_file, c: connection) &priority=5 else c$http$resp_fuids[|c$http$resp_fuids|] = f$id; } - } \ No newline at end of file + } diff --git a/scripts/base/protocols/irc/files.bro b/scripts/base/protocols/irc/files.bro index f4553b534a..8708270bfd 100644 --- a/scripts/base/protocols/irc/files.bro +++ b/scripts/base/protocols/irc/files.bro @@ -27,7 +27,7 @@ event bro_init() &priority=5 Files::register_protocol(Analyzer::ANALYZER_IRC_DATA, IRC::get_file_handle); } -event file_over_new_connection(f: fa_file, c: connection) &priority=5 +event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5 { if ( [c$id$resp_h, c$id$resp_p] !in dcc_expected_transfers ) return; @@ -38,4 +38,4 @@ event file_over_new_connection(f: fa_file, c: connection) &priority=5 f$info$filename = irc$dcc_file_name; if ( f?$mime_type ) irc$dcc_mime_type = f$mime_type; - } \ No newline at end of file + } diff --git a/scripts/base/protocols/smtp/entities.bro b/scripts/base/protocols/smtp/entities.bro index 067b8acf8e..ec43b39ce1 100644 --- a/scripts/base/protocols/smtp/entities.bro +++ b/scripts/base/protocols/smtp/entities.bro @@ -31,7 +31,7 @@ event mime_begin_entity(c: connection) &priority=10 ++c$smtp_state$mime_depth; } -event file_over_new_connection(f: fa_file, c: connection) &priority=5 +event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5 { if ( f$source != "SMTP" ) return; diff --git a/scripts/base/protocols/smtp/files.bro b/scripts/base/protocols/smtp/files.bro index e67181d6bc..1cf9ec01e1 100644 --- a/scripts/base/protocols/smtp/files.bro +++ b/scripts/base/protocols/smtp/files.bro @@ -27,8 +27,8 @@ event bro_init() &priority=5 Files::register_protocol(Analyzer::ANALYZER_SMTP, SMTP::get_file_handle); } -event file_over_new_connection(f: fa_file, c: connection) &priority=5 +event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5 { if ( c?$smtp ) c$smtp$fuids[|c$smtp$fuids|] = f$id; - } \ No newline at end of file + } diff --git a/src/event.bif b/src/event.bif index df22902094..e4d6f8c844 100644 --- a/src/event.bif +++ b/src/event.bif @@ -911,8 +911,10 @@ event file_new%(f: fa_file%); ## ## c: The new connection over which the file is seen being transferred. ## +## is_orig: true if the originator of *c* is the one sending the file. +## ## .. bro:see:: file_new file_timeout file_gap file_state_remove -event file_over_new_connection%(f: fa_file, c: connection%); +event file_over_new_connection%(f: fa_file, c: connection, is_orig: bool%); ## Indicates that file analysis has timed out because no activity was seen ## for the file in a while. diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc index ed3d2ae9a8..9a06fa3db9 100644 --- a/src/file_analysis/File.cc +++ b/src/file_analysis/File.cc @@ -90,7 +90,7 @@ File::File(const string& file_id, Connection* conn, analyzer::Tag tag, // add source, connection, is_orig fields SetSource(analyzer_mgr->GetAnalyzerName(tag)); val->Assign(is_orig_idx, new Val(is_orig, TYPE_BOOL)); - UpdateConnectionFields(conn); + UpdateConnectionFields(conn, is_orig); } UpdateLastActivityTime(); @@ -113,7 +113,7 @@ double File::GetLastActivityTime() const return val->Lookup(last_active_idx)->AsTime(); } -void File::UpdateConnectionFields(Connection* conn) +void File::UpdateConnectionFields(Connection* conn, bool is_orig) { if ( ! conn ) return; @@ -137,6 +137,7 @@ void File::UpdateConnectionFields(Connection* conn) val_list* vl = new val_list(); vl->append(val->Ref()); vl->append(conn_val->Ref()); + vl->append(new Val(is_orig, TYPE_BOOL)); if ( did_file_new_event ) FileEvent(file_over_new_connection, vl); diff --git a/src/file_analysis/File.h b/src/file_analysis/File.h index 5d967e7356..794734d24b 100644 --- a/src/file_analysis/File.h +++ b/src/file_analysis/File.h @@ -173,8 +173,9 @@ protected: * Updates the "conn_ids" and "conn_uids" fields in #val record with the * \c conn_id and UID taken from \a conn. * @param conn the connection over which a part of the file has been seen. + * @param is_orig true if the connection originator is sending the file. */ - void UpdateConnectionFields(Connection* conn); + void UpdateConnectionFields(Connection* conn, bool is_orig); /** * Increment a byte count field of #val record by \a size. diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index 453c6f7902..4e25bb0b0e 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -250,7 +250,7 @@ File* Manager::GetFile(const string& file_id, Connection* conn, rval->UpdateLastActivityTime(); if ( update_conn ) - rval->UpdateConnectionFields(conn); + rval->UpdateConnectionFields(conn, is_orig); } return rval; diff --git a/testing/scripts/file-analysis-test.bro b/testing/scripts/file-analysis-test.bro index 9df640c893..cf2bbf2d59 100644 --- a/testing/scripts/file-analysis-test.bro +++ b/testing/scripts/file-analysis-test.bro @@ -66,7 +66,7 @@ event file_new(f: fa_file) } } -event file_over_new_connection(f: fa_file, c: connection) +event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) { print "FILE_OVER_NEW_CONNECTION"; } From 39444b5af79de557b5ead73a9c2156bec1e2ea46 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Tue, 9 Jul 2013 22:44:55 -0400 Subject: [PATCH 051/118] Moved DPD signatures into script specific directories. - This caused us to lose signatures for POP3 and Bittorrent. These will need discovered in the repository again when we add scripts for those analyzers. --- scripts/base/frameworks/dpd/dpd.sig | 212 -------------------- scripts/base/frameworks/dpd/main.bro | 2 - scripts/base/init-default.bro | 1 + scripts/base/protocols/ftp/__load__.bro | 2 + scripts/base/protocols/ftp/dpd.sig | 15 ++ scripts/base/protocols/http/__load__.bro | 2 + scripts/base/protocols/http/dpd.sig | 13 ++ scripts/base/protocols/irc/__load__.bro | 2 + scripts/base/protocols/irc/dpd.sig | 33 +++ scripts/base/protocols/smtp/__load__.bro | 2 + scripts/base/protocols/smtp/dpd.sig | 12 ++ scripts/base/protocols/socks/__load__.bro | 4 +- scripts/base/protocols/socks/dpd.sig | 48 +++++ scripts/base/protocols/ssh/__load__.bro | 4 +- scripts/base/protocols/ssh/dpd.sig | 13 ++ scripts/base/protocols/ssl/__load__.bro | 2 + scripts/base/protocols/ssl/dpd.sig | 15 ++ scripts/base/protocols/tunnels/__load__.bro | 1 + scripts/base/protocols/tunnels/dpd.sig | 14 ++ 19 files changed, 181 insertions(+), 216 deletions(-) delete mode 100644 scripts/base/frameworks/dpd/dpd.sig create mode 100644 scripts/base/protocols/ftp/dpd.sig create mode 100644 scripts/base/protocols/http/dpd.sig create mode 100644 scripts/base/protocols/irc/dpd.sig create mode 100644 scripts/base/protocols/smtp/dpd.sig create mode 100644 scripts/base/protocols/socks/dpd.sig create mode 100644 scripts/base/protocols/ssh/dpd.sig create mode 100644 scripts/base/protocols/ssl/dpd.sig create mode 100644 scripts/base/protocols/tunnels/__load__.bro create mode 100644 scripts/base/protocols/tunnels/dpd.sig diff --git a/scripts/base/frameworks/dpd/dpd.sig b/scripts/base/frameworks/dpd/dpd.sig deleted file mode 100644 index 49e24cefc6..0000000000 --- a/scripts/base/frameworks/dpd/dpd.sig +++ /dev/null @@ -1,212 +0,0 @@ -# Signatures to initiate dynamic protocol detection. - -signature dpd_ftp_client { - ip-proto == tcp - payload /(|.*[\n\r]) *[uU][sS][eE][rR] / - tcp-state originator -} - -# Match for server greeting (220, 120) and for login or passwd -# required (230, 331). -signature dpd_ftp_server { - ip-proto == tcp - payload /[\n\r ]*(120|220)[^0-9].*[\n\r] *(230|331)[^0-9]/ - tcp-state responder - requires-reverse-signature dpd_ftp_client - enable "ftp" -} - -signature dpd_http_client { - ip-proto == tcp - payload /^[[:space:]]*(GET|HEAD|POST)[[:space:]]*/ - tcp-state originator -} - -signature dpd_http_server { - ip-proto == tcp - payload /^HTTP\/[0-9]/ - tcp-state responder - requires-reverse-signature dpd_http_client - enable "http" -} - -signature dpd_bittorrenttracker_client { - ip-proto == tcp - payload /^.*\/announce\?.*info_hash/ - tcp-state originator -} - -signature dpd_bittorrenttracker_server { - ip-proto == tcp - payload /^HTTP\/[0-9]/ - tcp-state responder - requires-reverse-signature dpd_bittorrenttracker_client - enable "bittorrenttracker" -} - -signature dpd_bittorrent_peer1 { - ip-proto == tcp - payload /^\x13BitTorrent protocol/ - tcp-state originator -} - -signature dpd_bittorrent_peer2 { - ip-proto == tcp - payload /^\x13BitTorrent protocol/ - tcp-state responder - requires-reverse-signature dpd_bittorrent_peer1 - enable "bittorrent" -} - -signature irc_client1 { - ip-proto == tcp - payload /(|.*[\r\n]) *[Uu][Ss][Ee][Rr] +.+[\n\r]+ *[Nn][Ii][Cc][Kk] +.*[\r\n]/ - requires-reverse-signature irc_server_reply - tcp-state originator - enable "irc" -} - -signature irc_client2 { - ip-proto == tcp - payload /(|.*[\r\n]) *[Nn][Ii][Cc][Kk] +.+[\r\n]+ *[Uu][Ss][Ee][Rr] +.+[\r\n]/ - requires-reverse-signature irc_server_reply - tcp-state originator - enable "irc" -} - -signature irc_server_reply { - ip-proto == tcp - payload /^(|.*[\n\r])(:[^ \n\r]+ )?[0-9][0-9][0-9] / - tcp-state responder -} - -signature irc_server_to_server1 { - ip-proto == tcp - payload /(|.*[\r\n]) *[Ss][Ee][Rr][Vv][Ee][Rr] +[^ ]+ +[0-9]+ +:.+[\r\n]/ -} - -signature irc_server_to_server2 { - ip-proto == tcp - payload /(|.*[\r\n]) *[Ss][Ee][Rr][Vv][Ee][Rr] +[^ ]+ +[0-9]+ +:.+[\r\n]/ - requires-reverse-signature irc_server_to_server1 - enable "irc" -} - -signature dpd_smtp_client { - ip-proto == tcp - payload /(|.*[\n\r])[[:space:]]*([hH][eE][lL][oO]|[eE][hH][lL][oO])/ - requires-reverse-signature dpd_smtp_server - enable "smtp" - tcp-state originator -} - -signature dpd_smtp_server { - ip-proto == tcp - payload /^[[:space:]]*220[[:space:]-]/ - tcp-state responder -} - -signature dpd_ssh_client { - ip-proto == tcp - payload /^[sS][sS][hH]-/ - requires-reverse-signature dpd_ssh_server - enable "ssh" - tcp-state originator -} - -signature dpd_ssh_server { - ip-proto == tcp - payload /^[sS][sS][hH]-/ - tcp-state responder -} - -signature dpd_pop3_server { - ip-proto == tcp - payload /^\+OK/ - requires-reverse-signature dpd_pop3_client - enable "pop3" - tcp-state responder -} - -signature dpd_pop3_client { - ip-proto == tcp - payload /(|.*[\r\n])[[:space:]]*([uU][sS][eE][rR][[:space:]]|[aA][pP][oO][pP][[:space:]]|[cC][aA][pP][aA]|[aA][uU][tT][hH])/ - tcp-state originator -} - -signature dpd_ssl_server { - ip-proto == tcp - # Server hello. - payload /^(\x16\x03[\x00\x01\x02]..\x02...\x03[\x00\x01\x02]|...?\x04..\x00\x02).*/ - requires-reverse-signature dpd_ssl_client - enable "ssl" - tcp-state responder -} - -signature dpd_ssl_client { - ip-proto == tcp - # Client hello. - payload /^(\x16\x03[\x00\x01\x02]..\x01...\x03[\x00\x01\x02]|...?\x01[\x00\x01\x02][\x02\x03]).*/ - tcp-state originator -} - -signature dpd_ayiya { - ip-proto = udp - payload /^..\x11\x29/ - enable "ayiya" -} - -signature dpd_teredo { - ip-proto = udp - payload /^(\x00\x00)|(\x00\x01)|([\x60-\x6f])/ - enable "teredo" -} - -signature dpd_socks4_client { - ip-proto == tcp - # '32' is a rather arbitrary max length for the user name. - payload /^\x04[\x01\x02].{0,32}\x00/ - tcp-state originator -} - -signature dpd_socks4_server { - ip-proto == tcp - requires-reverse-signature dpd_socks4_client - payload /^\x00[\x5a\x5b\x5c\x5d]/ - tcp-state responder - enable "socks" -} - -signature dpd_socks4_reverse_client { - ip-proto == tcp - # '32' is a rather arbitrary max length for the user name. - payload /^\x04[\x01\x02].{0,32}\x00/ - tcp-state responder -} - -signature dpd_socks4_reverse_server { - ip-proto == tcp - requires-reverse-signature dpd_socks4_reverse_client - payload /^\x00[\x5a\x5b\x5c\x5d]/ - tcp-state originator - enable "socks" -} - -signature dpd_socks5_client { - ip-proto == tcp - # Watch for a few authentication methods to reduce false positives. - payload /^\x05.[\x00\x01\x02]/ - tcp-state originator -} - -signature dpd_socks5_server { - ip-proto == tcp - requires-reverse-signature dpd_socks5_client - # Watch for a single authentication method to be chosen by the server or - # the server to indicate the no authentication is required. - payload /^\x05(\x00|\x01[\x00\x01\x02])/ - tcp-state responder - enable "socks" -} - - diff --git a/scripts/base/frameworks/dpd/main.bro b/scripts/base/frameworks/dpd/main.bro index c3282a1da4..9df8a45e5e 100644 --- a/scripts/base/frameworks/dpd/main.bro +++ b/scripts/base/frameworks/dpd/main.bro @@ -3,8 +3,6 @@ module DPD; -@load-sigs ./dpd.sig - export { ## Add the DPD logging stream identifier. redef enum Log::ID += { LOG }; diff --git a/scripts/base/init-default.bro b/scripts/base/init-default.bro index 9c3995673c..6c40a7547f 100644 --- a/scripts/base/init-default.bro +++ b/scripts/base/init-default.bro @@ -46,5 +46,6 @@ @load base/protocols/ssh @load base/protocols/ssl @load base/protocols/syslog +@load base/protocols/tunnels @load base/misc/find-checksum-offloading diff --git a/scripts/base/protocols/ftp/__load__.bro b/scripts/base/protocols/ftp/__load__.bro index 464571dc7d..f3226de69d 100644 --- a/scripts/base/protocols/ftp/__load__.bro +++ b/scripts/base/protocols/ftp/__load__.bro @@ -3,3 +3,5 @@ @load ./file-analysis @load ./file-extract @load ./gridftp + +@load-sigs ./dpd.sig \ No newline at end of file diff --git a/scripts/base/protocols/ftp/dpd.sig b/scripts/base/protocols/ftp/dpd.sig new file mode 100644 index 0000000000..3a6ceadd18 --- /dev/null +++ b/scripts/base/protocols/ftp/dpd.sig @@ -0,0 +1,15 @@ +signature dpd_ftp_client { + ip-proto == tcp + payload /(|.*[\n\r]) *[uU][sS][eE][rR] / + tcp-state originator +} + +# Match for server greeting (220, 120) and for login or passwd +# required (230, 331). +signature dpd_ftp_server { + ip-proto == tcp + payload /[\n\r ]*(120|220)[^0-9].*[\n\r] *(230|331)[^0-9]/ + tcp-state responder + requires-reverse-signature dpd_ftp_client + enable "ftp" +} diff --git a/scripts/base/protocols/http/__load__.bro b/scripts/base/protocols/http/__load__.bro index 58618dedc7..8f426c1521 100644 --- a/scripts/base/protocols/http/__load__.bro +++ b/scripts/base/protocols/http/__load__.bro @@ -4,3 +4,5 @@ @load ./file-ident @load ./file-hash @load ./file-extract + +@load-sigs ./dpd.sig \ No newline at end of file diff --git a/scripts/base/protocols/http/dpd.sig b/scripts/base/protocols/http/dpd.sig new file mode 100644 index 0000000000..13470f4e95 --- /dev/null +++ b/scripts/base/protocols/http/dpd.sig @@ -0,0 +1,13 @@ +signature dpd_http_client { + ip-proto == tcp + payload /^[[:space:]]*(GET|HEAD|POST)[[:space:]]*/ + tcp-state originator +} + +signature dpd_http_server { + ip-proto == tcp + payload /^HTTP\/[0-9]/ + tcp-state responder + requires-reverse-signature dpd_http_client + enable "http" +} diff --git a/scripts/base/protocols/irc/__load__.bro b/scripts/base/protocols/irc/__load__.bro index 5123385b0c..2e60cda0a6 100644 --- a/scripts/base/protocols/irc/__load__.bro +++ b/scripts/base/protocols/irc/__load__.bro @@ -1,3 +1,5 @@ @load ./main @load ./dcc-send @load ./file-analysis + +@load-sigs ./dpd.sig \ No newline at end of file diff --git a/scripts/base/protocols/irc/dpd.sig b/scripts/base/protocols/irc/dpd.sig new file mode 100644 index 0000000000..308358d619 --- /dev/null +++ b/scripts/base/protocols/irc/dpd.sig @@ -0,0 +1,33 @@ +signature irc_client1 { + ip-proto == tcp + payload /(|.*[\r\n]) *[Uu][Ss][Ee][Rr] +.+[\n\r]+ *[Nn][Ii][Cc][Kk] +.*[\r\n]/ + requires-reverse-signature irc_server_reply + tcp-state originator + enable "irc" +} + +signature irc_client2 { + ip-proto == tcp + payload /(|.*[\r\n]) *[Nn][Ii][Cc][Kk] +.+[\r\n]+ *[Uu][Ss][Ee][Rr] +.+[\r\n]/ + requires-reverse-signature irc_server_reply + tcp-state originator + enable "irc" +} + +signature irc_server_reply { + ip-proto == tcp + payload /^(|.*[\n\r])(:[^ \n\r]+ )?[0-9][0-9][0-9] / + tcp-state responder +} + +signature irc_server_to_server1 { + ip-proto == tcp + payload /(|.*[\r\n]) *[Ss][Ee][Rr][Vv][Ee][Rr] +[^ ]+ +[0-9]+ +:.+[\r\n]/ +} + +signature irc_server_to_server2 { + ip-proto == tcp + payload /(|.*[\r\n]) *[Ss][Ee][Rr][Vv][Ee][Rr] +[^ ]+ +[0-9]+ +:.+[\r\n]/ + requires-reverse-signature irc_server_to_server1 + enable "irc" +} diff --git a/scripts/base/protocols/smtp/__load__.bro b/scripts/base/protocols/smtp/__load__.bro index bac9cc118f..3e3fde6947 100644 --- a/scripts/base/protocols/smtp/__load__.bro +++ b/scripts/base/protocols/smtp/__load__.bro @@ -2,3 +2,5 @@ @load ./entities @load ./entities-excerpt @load ./file-analysis + +@load-sigs ./dpd.sig \ No newline at end of file diff --git a/scripts/base/protocols/smtp/dpd.sig b/scripts/base/protocols/smtp/dpd.sig new file mode 100644 index 0000000000..49ed7ea3be --- /dev/null +++ b/scripts/base/protocols/smtp/dpd.sig @@ -0,0 +1,12 @@ +signature dpd_smtp_client { + ip-proto == tcp + payload /(|.*[\n\r])[[:space:]]*([hH][eE][lL][oO]|[eE][hH][lL][oO])/ + requires-reverse-signature dpd_smtp_server + enable "smtp" + tcp-state originator +} + +signature dpd_smtp_server { + ip-proto == tcp + payload /^[[:space:]]*220[[:space:]-]/ + tcp-state responder diff --git a/scripts/base/protocols/socks/__load__.bro b/scripts/base/protocols/socks/__load__.bro index 0098b81a7a..80193afb6f 100644 --- a/scripts/base/protocols/socks/__load__.bro +++ b/scripts/base/protocols/socks/__load__.bro @@ -1,2 +1,4 @@ @load ./consts -@load ./main \ No newline at end of file +@load ./main + +@load-sigs ./dpd.sig \ No newline at end of file diff --git a/scripts/base/protocols/socks/dpd.sig b/scripts/base/protocols/socks/dpd.sig new file mode 100644 index 0000000000..3dcd7a945a --- /dev/null +++ b/scripts/base/protocols/socks/dpd.sig @@ -0,0 +1,48 @@ +signature dpd_socks4_client { + ip-proto == tcp + # '32' is a rather arbitrary max length for the user name. + payload /^\x04[\x01\x02].{0,32}\x00/ + tcp-state originator +} + +signature dpd_socks4_server { + ip-proto == tcp + requires-reverse-signature dpd_socks4_client + payload /^\x00[\x5a\x5b\x5c\x5d]/ + tcp-state responder + enable "socks" +} + +signature dpd_socks4_reverse_client { + ip-proto == tcp + # '32' is a rather arbitrary max length for the user name. + payload /^\x04[\x01\x02].{0,32}\x00/ + tcp-state responder +} + +signature dpd_socks4_reverse_server { + ip-proto == tcp + requires-reverse-signature dpd_socks4_reverse_client + payload /^\x00[\x5a\x5b\x5c\x5d]/ + tcp-state originator + enable "socks" +} + +signature dpd_socks5_client { + ip-proto == tcp + # Watch for a few authentication methods to reduce false positives. + payload /^\x05.[\x00\x01\x02]/ + tcp-state originator +} + +signature dpd_socks5_server { + ip-proto == tcp + requires-reverse-signature dpd_socks5_client + # Watch for a single authentication method to be chosen by the server or + # the server to indicate the no authentication is required. + payload /^\x05(\x00|\x01[\x00\x01\x02])/ + tcp-state responder + enable "socks" +} + + diff --git a/scripts/base/protocols/ssh/__load__.bro b/scripts/base/protocols/ssh/__load__.bro index d551be57d3..0f3cb011f8 100644 --- a/scripts/base/protocols/ssh/__load__.bro +++ b/scripts/base/protocols/ssh/__load__.bro @@ -1 +1,3 @@ -@load ./main \ No newline at end of file +@load ./main + +@load-sigs ./dpd.sig \ No newline at end of file diff --git a/scripts/base/protocols/ssh/dpd.sig b/scripts/base/protocols/ssh/dpd.sig new file mode 100644 index 0000000000..95e22908ab --- /dev/null +++ b/scripts/base/protocols/ssh/dpd.sig @@ -0,0 +1,13 @@ +signature dpd_ssh_client { + ip-proto == tcp + payload /^[sS][sS][hH]-/ + requires-reverse-signature dpd_ssh_server + enable "ssh" + tcp-state originator +} + +signature dpd_ssh_server { + ip-proto == tcp + payload /^[sS][sS][hH]-/ + tcp-state responder +} diff --git a/scripts/base/protocols/ssl/__load__.bro b/scripts/base/protocols/ssl/__load__.bro index 239438047c..80cb4e216a 100644 --- a/scripts/base/protocols/ssl/__load__.bro +++ b/scripts/base/protocols/ssl/__load__.bro @@ -1,3 +1,5 @@ @load ./consts @load ./main @load ./mozilla-ca-list + +@load-sigs ./dpd.sig \ No newline at end of file diff --git a/scripts/base/protocols/ssl/dpd.sig b/scripts/base/protocols/ssl/dpd.sig new file mode 100644 index 0000000000..b36b9a5aa5 --- /dev/null +++ b/scripts/base/protocols/ssl/dpd.sig @@ -0,0 +1,15 @@ +signature dpd_ssl_server { + ip-proto == tcp + # Server hello. + payload /^(\x16\x03[\x00\x01\x02]..\x02...\x03[\x00\x01\x02]|...?\x04..\x00\x02).*/ + requires-reverse-signature dpd_ssl_client + enable "ssl" + tcp-state responder +} + +signature dpd_ssl_client { + ip-proto == tcp + # Client hello. + payload /^(\x16\x03[\x00\x01\x02]..\x01...\x03[\x00\x01\x02]|...?\x01[\x00\x01\x02][\x02\x03]).*/ + tcp-state originator +} diff --git a/scripts/base/protocols/tunnels/__load__.bro b/scripts/base/protocols/tunnels/__load__.bro new file mode 100644 index 0000000000..9de7b6ff19 --- /dev/null +++ b/scripts/base/protocols/tunnels/__load__.bro @@ -0,0 +1 @@ +@load-sigs ./dpd.sig \ No newline at end of file diff --git a/scripts/base/protocols/tunnels/dpd.sig b/scripts/base/protocols/tunnels/dpd.sig new file mode 100644 index 0000000000..0c66775f5d --- /dev/null +++ b/scripts/base/protocols/tunnels/dpd.sig @@ -0,0 +1,14 @@ +# Provide DPD signatures for tunneling protocols that otherwise +# wouldn't be detected at all. + +signature dpd_ayiya { + ip-proto = udp + payload /^..\x11\x29/ + enable "ayiya" +} + +signature dpd_teredo { + ip-proto = udp + payload /^(\x00\x00)|(\x00\x01)|([\x60-\x6f])/ + enable "teredo" +} From 4dda9cd3bab0ca2eb2123a57ea4685eef7c560e1 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Tue, 9 Jul 2013 22:45:21 -0400 Subject: [PATCH 052/118] Fix a bug where the same analyzer tag was reused for two different analyzers. --- src/analyzer/protocol/bittorrent/BitTorrentTracker.cc | 2 +- src/analyzer/protocol/bittorrent/Plugin.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/analyzer/protocol/bittorrent/BitTorrentTracker.cc b/src/analyzer/protocol/bittorrent/BitTorrentTracker.cc index b32db9a8bd..98adcaa610 100644 --- a/src/analyzer/protocol/bittorrent/BitTorrentTracker.cc +++ b/src/analyzer/protocol/bittorrent/BitTorrentTracker.cc @@ -22,7 +22,7 @@ static RecordType* bittorrent_benc_value; static TableType* bittorrent_benc_dir; BitTorrentTracker_Analyzer::BitTorrentTracker_Analyzer(Connection* c) -: tcp::TCP_ApplicationAnalyzer("BITTORRENT", c) +: tcp::TCP_ApplicationAnalyzer("BITTORRENTTRACKER", c) { if ( ! bt_tracker_headers ) { diff --git a/src/analyzer/protocol/bittorrent/Plugin.cc b/src/analyzer/protocol/bittorrent/Plugin.cc index 2da9972d0d..7fea68bf07 100644 --- a/src/analyzer/protocol/bittorrent/Plugin.cc +++ b/src/analyzer/protocol/bittorrent/Plugin.cc @@ -7,6 +7,6 @@ BRO_PLUGIN_BEGIN(Bro, BitTorrent) BRO_PLUGIN_DESCRIPTION("BitTorrent Analyzer"); BRO_PLUGIN_ANALYZER("BitTorrent", bittorrent::BitTorrent_Analyzer); - BRO_PLUGIN_ANALYZER("BitTorrentTracker", bittorrent::BitTorrent_Analyzer); + BRO_PLUGIN_ANALYZER("BitTorrentTracker", bittorrent::BitTorrentTracker_Analyzer); BRO_PLUGIN_BIF_FILE(events); BRO_PLUGIN_END From 60da0f476416e4a7a831a20df9f06b8f0db1a782 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Tue, 9 Jul 2013 22:57:36 -0400 Subject: [PATCH 053/118] Added a missing curly brace in smtp/dpd.sig --- scripts/base/protocols/smtp/dpd.sig | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/base/protocols/smtp/dpd.sig b/scripts/base/protocols/smtp/dpd.sig index 49ed7ea3be..6fbde59059 100644 --- a/scripts/base/protocols/smtp/dpd.sig +++ b/scripts/base/protocols/smtp/dpd.sig @@ -10,3 +10,4 @@ signature dpd_smtp_server { ip-proto == tcp payload /^[[:space:]]*220[[:space:]-]/ tcp-state responder +} \ No newline at end of file From 8322bbfd620038171f93a0aca09119c406dab221 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Tue, 9 Jul 2013 23:28:09 -0400 Subject: [PATCH 054/118] Small test fixes. --- .../canonified_loaded_scripts.log | 5 +++-- testing/btest/core/tunnels/teredo-known-services.test | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log index 28430aacd8..6d6906d924 100644 --- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2013-07-05-05-21-48 +#open 2013-07-10-03-19-58 #fields name #types string scripts/base/init-bare.bro @@ -191,6 +191,7 @@ scripts/base/init-default.bro scripts/base/protocols/syslog/__load__.bro scripts/base/protocols/syslog/consts.bro scripts/base/protocols/syslog/main.bro + scripts/base/protocols/tunnels/__load__.bro scripts/base/misc/find-checksum-offloading.bro scripts/policy/misc/loaded-scripts.bro -#close 2013-07-05-05-21-48 +#close 2013-07-10-03-19-58 diff --git a/testing/btest/core/tunnels/teredo-known-services.test b/testing/btest/core/tunnels/teredo-known-services.test index d03ef2ab71..da3a538515 100644 --- a/testing/btest/core/tunnels/teredo-known-services.test +++ b/testing/btest/core/tunnels/teredo-known-services.test @@ -1,6 +1,6 @@ -# @TEST-EXEC: bro -r $TRACES/tunnels/false-teredo.pcap base/frameworks/dpd protocols/conn/known-services Tunnel::delay_teredo_confirmation=T "Site::local_nets+={192.168.1.0/24}" +# @TEST-EXEC: bro -r $TRACES/tunnels/false-teredo.pcap base/frameworks/dpd base/protocols/tunnels protocols/conn/known-services Tunnel::delay_teredo_confirmation=T "Site::local_nets+={192.168.1.0/24}" # @TEST-EXEC: test ! -e known_services.log -# @TEST-EXEC: bro -b -r $TRACES/tunnels/false-teredo.pcap base/frameworks/dpd protocols/conn/known-services Tunnel::delay_teredo_confirmation=F "Site::local_nets+={192.168.1.0/24}" +# @TEST-EXEC: bro -b -r $TRACES/tunnels/false-teredo.pcap base/frameworks/dpd base/protocols/tunnels protocols/conn/known-services Tunnel::delay_teredo_confirmation=F "Site::local_nets+={192.168.1.0/24}" # @TEST-EXEC: btest-diff known_services.log # The first case using Tunnel::delay_teredo_confirmation=T doesn't produce From 40201a180e54a560711003f2e65e14be87a7b8e9 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Tue, 9 Jul 2013 21:00:53 -0700 Subject: [PATCH 055/118] Fixing for unserializion error. Because BloomFilter is a base class, with other classes derived from it, it needs special treatment. --- src/SerialTypes.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/SerialTypes.h b/src/SerialTypes.h index 9e4aef5b3b..85aed10bda 100644 --- a/src/SerialTypes.h +++ b/src/SerialTypes.h @@ -52,8 +52,6 @@ SERIAL_IS(RE_MATCHER, 0x1400) SERIAL_IS(BITVECTOR, 0x1500) SERIAL_IS(COUNTERVECTOR, 0x1600) SERIAL_IS(BLOOMFILTER, 0x1700) -SERIAL_IS(BASICBLOOMFILTER, 0x1800) -SERIAL_IS(COUNTINGBLOOMFILTER, 0x1900) // These are the externally visible types. const SerialType SER_NONE = 0; @@ -203,6 +201,11 @@ SERIAL_FUNC(BRO_FUNC, 2) SERIAL_FUNC(DEBUG_FUNC, 3) SERIAL_FUNC(BUILTIN_FUNC, 4) +#define SERIAL_BLOOMFILTER(name, val) SERIAL_CONST(name, val, BLOOMFILTER) +SERIAL_BLOOMFILTER(BLOOMFILTER, 1) +SERIAL_BLOOMFILTER(BASICBLOOMFILTER, 2) +SERIAL_BLOOMFILTER(COUNTINGBLOOMFILTER, 3) + SERIAL_CONST2(ID) SERIAL_CONST2(STATE_ACCESS) SERIAL_CONST2(CASE) @@ -210,8 +213,5 @@ SERIAL_CONST2(LOCATION) SERIAL_CONST2(RE_MATCHER) SERIAL_CONST2(BITVECTOR) SERIAL_CONST2(COUNTERVECTOR) -SERIAL_CONST2(BLOOMFILTER) -SERIAL_CONST2(BASICBLOOMFILTER) -SERIAL_CONST2(COUNTINGBLOOMFILTER) #endif From 446344ae998e8eef30a0f45a05dcea29efe4f032 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Wed, 10 Jul 2013 01:32:59 -0700 Subject: [PATCH 056/118] Add missing include for GCC. --- src/BloomFilter.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc index a7727630f7..c59092b1e4 100644 --- a/src/BloomFilter.cc +++ b/src/BloomFilter.cc @@ -1,6 +1,7 @@ #include "BloomFilter.h" #include +#include #include "CounterVector.h" #include "Serializer.h" From 0394493faccf3975094208b4142d3c19b3482b4b Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Wed, 10 Jul 2013 11:53:44 -0500 Subject: [PATCH 057/118] const adjustment And fixes compiler warning about overloaded virtual function hiding. --- src/file_analysis/Component.cc | 2 +- src/file_analysis/Component.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/file_analysis/Component.cc b/src/file_analysis/Component.cc index d686918130..99531e40f5 100644 --- a/src/file_analysis/Component.cc +++ b/src/file_analysis/Component.cc @@ -41,7 +41,7 @@ analyzer::Tag Component::Tag() const return tag; } -void Component::Describe(ODesc* d) +void Component::Describe(ODesc* d) const { plugin::Component::Describe(d); d->Add(name); diff --git a/src/file_analysis/Component.h b/src/file_analysis/Component.h index 5ec97f2e0c..8b79436991 100644 --- a/src/file_analysis/Component.h +++ b/src/file_analysis/Component.h @@ -90,7 +90,7 @@ public: * Generates a human-readable description of the component's main * parameters. This goes into the output of \c "bro -NN". */ - virtual void Describe(ODesc* d); + virtual void Describe(ODesc* d) const; Component& operator=(const Component& other); From 99d604c9b565d18a73c12b91512aebebade7d57d Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Wed, 10 Jul 2013 14:06:51 -0500 Subject: [PATCH 058/118] Make the custom libmagic database a git submodule. The magic files couldn't be in the root of that repo or else libmagic would abort when it ran in to the .git* files and tried to treat them like magic files, too. --- .gitmodules | 3 + CMakeLists.txt | 7 +- magic | 1 + magic/COPYING | 29 ---- magic/animation | 208 ------------------------ magic/archive | 242 ---------------------------- magic/assembler | 19 --- magic/audio | 149 ----------------- magic/c-lang | 47 ------ magic/cafebabe | 31 ---- magic/commands | 82 ---------- magic/compress | 77 --------- magic/database | 47 ------ magic/diff | 25 --- magic/elf | 43 ----- magic/epoc | 34 ---- magic/filesystems | 12 -- magic/flash | 18 --- magic/fonts | 32 ---- magic/fortran | 7 - magic/frame | 31 ---- magic/gimp | 13 -- magic/gnu | 23 --- magic/gnumeric | 8 - magic/icc | 51 ------ magic/iff | 21 --- magic/images | 255 ------------------------------ magic/java | 16 -- magic/javascript | 17 -- magic/jpeg | 31 ---- magic/kde | 11 -- magic/kml | 30 ---- magic/linux | 22 --- magic/lisp | 42 ----- magic/lua | 17 -- magic/m4 | 7 - magic/macintosh | 21 --- magic/mail.news | 35 ---- magic/make | 16 -- magic/marc21 | 29 ---- magic/matroska | 17 -- magic/misctools | 9 -- magic/msdos | 369 ------------------------------------------- magic/neko | 12 -- magic/pascal | 11 -- magic/pdf | 8 - magic/perl | 26 --- magic/pgp | 27 ---- magic/pkgadd | 7 - magic/printer | 14 -- magic/python | 46 ------ magic/riff | 36 ----- magic/rpm | 12 -- magic/rtf | 9 -- magic/ruby | 28 ---- magic/sc | 7 - magic/sgml | 82 ---------- magic/sniffer | 17 -- magic/tcl | 23 --- magic/tex | 56 ------- magic/troff | 22 --- magic/vorbis | 26 --- magic/warc | 14 -- magic/windows | 19 --- magic/wordprocessors | 43 ----- magic/xwindows | 11 -- 66 files changed, 7 insertions(+), 2753 deletions(-) create mode 160000 magic delete mode 100644 magic/COPYING delete mode 100644 magic/animation delete mode 100644 magic/archive delete mode 100644 magic/assembler delete mode 100644 magic/audio delete mode 100644 magic/c-lang delete mode 100644 magic/cafebabe delete mode 100644 magic/commands delete mode 100644 magic/compress delete mode 100644 magic/database delete mode 100644 magic/diff delete mode 100644 magic/elf delete mode 100644 magic/epoc delete mode 100644 magic/filesystems delete mode 100644 magic/flash delete mode 100644 magic/fonts delete mode 100644 magic/fortran delete mode 100644 magic/frame delete mode 100644 magic/gimp delete mode 100644 magic/gnu delete mode 100644 magic/gnumeric delete mode 100644 magic/icc delete mode 100644 magic/iff delete mode 100644 magic/images delete mode 100644 magic/java delete mode 100644 magic/javascript delete mode 100644 magic/jpeg delete mode 100644 magic/kde delete mode 100644 magic/kml delete mode 100644 magic/linux delete mode 100644 magic/lisp delete mode 100644 magic/lua delete mode 100644 magic/m4 delete mode 100644 magic/macintosh delete mode 100644 magic/mail.news delete mode 100644 magic/make delete mode 100644 magic/marc21 delete mode 100644 magic/matroska delete mode 100644 magic/misctools delete mode 100644 magic/msdos delete mode 100644 magic/neko delete mode 100644 magic/pascal delete mode 100644 magic/pdf delete mode 100644 magic/perl delete mode 100644 magic/pgp delete mode 100644 magic/pkgadd delete mode 100644 magic/printer delete mode 100644 magic/python delete mode 100644 magic/riff delete mode 100644 magic/rpm delete mode 100644 magic/rtf delete mode 100644 magic/ruby delete mode 100644 magic/sc delete mode 100644 magic/sgml delete mode 100644 magic/sniffer delete mode 100644 magic/tcl delete mode 100644 magic/tex delete mode 100644 magic/troff delete mode 100644 magic/vorbis delete mode 100644 magic/warc delete mode 100644 magic/windows delete mode 100644 magic/wordprocessors delete mode 100644 magic/xwindows diff --git a/.gitmodules b/.gitmodules index 95053091cf..2ede715f49 100644 --- a/.gitmodules +++ b/.gitmodules @@ -16,3 +16,6 @@ [submodule "cmake"] path = cmake url = git://git.bro-ids.org/cmake +[submodule "magic"] + path = magic + url = git://git.bro.org/bromagic diff --git a/CMakeLists.txt b/CMakeLists.txt index b95b637770..0f64f304b8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,7 @@ get_filename_component(BRO_SCRIPT_INSTALL_PATH ${BRO_SCRIPT_INSTALL_PATH} ABSOLUTE) set(BRO_MAGIC_INSTALL_PATH ${BRO_ROOT_DIR}/share/bro/magic) -set(BRO_MAGIC_SOURCE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/magic) +set(BRO_MAGIC_SOURCE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/magic/database) configure_file(bro-path-dev.in ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev) file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.sh @@ -201,9 +201,8 @@ CheckOptionalBuildSources(aux/broctl Broctl INSTALL_BROCTL) CheckOptionalBuildSources(aux/bro-aux Bro-Aux INSTALL_AUX_TOOLS) CheckOptionalBuildSources(aux/broccoli Broccoli INSTALL_BROCCOLI) -install(DIRECTORY ./magic/ DESTINATION ${BRO_MAGIC_INSTALL_PATH} FILES_MATCHING - PATTERN "COPYING" EXCLUDE - PATTERN "*" +install(DIRECTORY ./magic/database/ + DESTINATION ${BRO_MAGIC_INSTALL_PATH} ) ######################################################################## diff --git a/magic b/magic new file mode 160000 index 0000000000..e87fe13a7b --- /dev/null +++ b/magic @@ -0,0 +1 @@ +Subproject commit e87fe13a7b776182ffc8c75076d42702f5c28fed diff --git a/magic/COPYING b/magic/COPYING deleted file mode 100644 index 7d2bf1e711..0000000000 --- a/magic/COPYING +++ /dev/null @@ -1,29 +0,0 @@ -# $File: LEGAL.NOTICE,v 1.15 2006/05/03 18:48:33 christos Exp $ -# Copyright (c) Ian F. Darwin 1986, 1987, 1989, 1990, 1991, 1992, 1994, 1995. -# Software written by Ian F. Darwin and others; -# maintained 1994- Christos Zoulas. -# -# This software is not subject to any export provision of the United States -# Department of Commerce, and may be exported to any country or planet. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice immediately at the beginning of the file, without modification, -# this list of conditions, and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR -# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -# SUCH DAMAGE. diff --git a/magic/animation b/magic/animation deleted file mode 100644 index 0cec03d511..0000000000 --- a/magic/animation +++ /dev/null @@ -1,208 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: animation,v 1.47 2013/02/06 14:18:52 christos Exp $ -# animation: file(1) magic for animation/movie formats -# -# animation formats -# MPEG, FLI, DL originally from vax@ccwf.cc.utexas.edu (VaX#n8) -# FLC, SGI, Apple originally from Daniel Quinlan (quinlan@yggdrasil.com) - -# SGI and Apple formats -0 string MOVI Silicon Graphics movie file -!:mime video/x-sgi-movie -4 string moov Apple QuickTime -!:mime video/quicktime -4 string mdat Apple QuickTime movie (unoptimized) -!:mime video/quicktime -#4 string wide Apple QuickTime movie (unoptimized) -#!:mime video/quicktime -#4 string skip Apple QuickTime movie (modified) -#!:mime video/quicktime -#4 string free Apple QuickTime movie (modified) -#!:mime video/quicktime -4 string idsc Apple QuickTime image (fast start) -!:mime image/x-quicktime -#4 string idat Apple QuickTime image (unoptimized) -#!:mime image/x-quicktime -4 string pckg Apple QuickTime compressed archive -!:mime application/x-quicktime-player -4 string/W jP JPEG 2000 image -!:mime image/jp2 -4 string ftyp ISO Media ->8 string isom \b, MPEG v4 system, version 1 -!:mime video/mp4 ->8 string mp41 \b, MPEG v4 system, version 1 -!:mime video/mp4 ->8 string mp42 \b, MPEG v4 system, version 2 -!:mime video/mp4 ->8 string/W jp2 \b, JPEG 2000 -!:mime image/jp2 ->8 string 3ge \b, MPEG v4 system, 3GPP -!:mime video/3gpp ->8 string 3gg \b, MPEG v4 system, 3GPP -!:mime video/3gpp ->8 string 3gp \b, MPEG v4 system, 3GPP -!:mime video/3gpp ->8 string 3gs \b, MPEG v4 system, 3GPP -!:mime video/3gpp ->8 string 3g2 \b, MPEG v4 system, 3GPP2 -!:mime video/3gpp2 ->8 string mmp4 \b, MPEG v4 system, 3GPP Mobile -!:mime video/mp4 ->8 string avc1 \b, MPEG v4 system, 3GPP JVT AVC -!:mime video/3gpp ->8 string/W M4A \b, MPEG v4 system, iTunes AAC-LC -!:mime audio/mp4 ->8 string/W M4V \b, MPEG v4 system, iTunes AVC-LC -!:mime video/mp4 ->8 string/W qt \b, Apple QuickTime movie -!:mime video/quicktime - -# MPEG sequences -# Scans for all common MPEG header start codes -0 belong&0xFFFFFF00 0x00000100 ->3 byte 0xBA MPEG sequence -!:mime video/mpeg -# GRR too general as it catches also FoxPro Memo example NG.FPT ->3 byte 0xB0 MPEG sequence, v4 -!:mime video/mpeg4-generic ->3 byte 0xB5 MPEG sequence, v4 -!:mime video/mpeg4-generic ->3 byte 0xB3 MPEG sequence -!:mime video/mpeg - -# MPEG ADTS Audio (*.mpx/mxa/aac) -# from dreesen@math.fu-berlin.de -# modified to fully support MPEG ADTS - -# MP3, M1A -# modified by Joerg Jenderek -# GRR the original test are too common for many DOS files -# so don't accept as MP3 until we've tested the rate -0 beshort&0xFFFE 0xFFFA -# rates ->2 byte&0xF0 0x10 MPEG ADTS, layer III, v1, 32 kbps -!:mime audio/mpeg ->2 byte&0xF0 0x20 MPEG ADTS, layer III, v1, 40 kbps -!:mime audio/mpeg ->2 byte&0xF0 0x30 MPEG ADTS, layer III, v1, 48 kbps -!:mime audio/mpeg ->2 byte&0xF0 0x40 MPEG ADTS, layer III, v1, 56 kbps -!:mime audio/mpeg ->2 byte&0xF0 0x50 MPEG ADTS, layer III, v1, 64 kbps -!:mime audio/mpeg ->2 byte&0xF0 0x60 MPEG ADTS, layer III, v1, 80 kbps -!:mime audio/mpeg ->2 byte&0xF0 0x70 MPEG ADTS, layer III, v1, 96 kbps -!:mime audio/mpeg ->2 byte&0xF0 0x80 MPEG ADTS, layer III, v1, 112 kbps -!:mime audio/mpeg ->2 byte&0xF0 0x90 MPEG ADTS, layer III, v1, 128 kbps -!:mime audio/mpeg ->2 byte&0xF0 0xA0 MPEG ADTS, layer III, v1, 160 kbps -!:mime audio/mpeg ->2 byte&0xF0 0xB0 MPEG ADTS, layer III, v1, 192 kbps -!:mime audio/mpeg ->2 byte&0xF0 0xC0 MPEG ADTS, layer III, v1, 224 kbps -!:mime audio/mpeg ->2 byte&0xF0 0xD0 MPEG ADTS, layer III, v1, 256 kbps -!:mime audio/mpeg ->2 byte&0xF0 0xE0 MPEG ADTS, layer III, v1, 320 kbps -!:mime audio/mpeg - -# MP2, M1A -0 beshort&0xFFFE 0xFFFC MPEG ADTS, layer II, v1 -!:mime audio/mpeg - -# MP3, M2A -0 beshort&0xFFFE 0xFFF2 MPEG ADTS, layer III, v2 -!:mime audio/mpeg - -# MPA, M2A -0 beshort&0xFFFE 0xFFF6 MPEG ADTS, layer I, v2 -!:mime audio/mpeg - -# MP3, M25A -0 beshort&0xFFFE 0xFFE2 MPEG ADTS, layer III, v2.5 -!:mime audio/mpeg - -# Stored AAC streams (instead of the MP4 format) -0 string ADIF MPEG ADIF, AAC -!:mime audio/x-hx-aac-adif - -# Live or stored single AAC stream (used with MPEG-2 systems) -0 beshort&0xFFF6 0xFFF0 MPEG ADTS, AAC -!:mime audio/x-hx-aac-adts - -# Live MPEG-4 audio streams (instead of RTP FlexMux) -0 beshort&0xFFE0 0x56E0 MPEG-4 LOAS -!:mime audio/x-mp4a-latm - -# This magic isn't strong enough (matches plausible ISO-8859-1 text) -#0 beshort 0x4DE1 MPEG-4 LO-EP audio stream -#!:mime audio/x-mp4a-latm - -# Summary: FLI animation format -# Created by: Daniel Quinlan -# Modified by (1): Abel Cheung (avoid over-generic detection) -4 leshort 0xAF11 -# standard FLI always has 320x200 resolution and 8 bit color ->8 leshort 320 ->>10 leshort 200 ->>>12 leshort 8 FLI animation, 320x200x8 -!:mime video/x-fli - -# Summary: FLC animation format -# Created by: Daniel Quinlan -# Modified by (1): Abel Cheung (avoid over-generic detection) -4 leshort 0xAF12 -# standard FLC always use 8 bit color ->12 leshort 8 FLC animation -!:mime video/x-flc - -# Microsoft Advanced Streaming Format (ASF) -0 belong 0x3026b275 Microsoft ASF -!:mime video/x-ms-asf - -# MNG Video Format, -0 string \x8aMNG MNG video data, -!:mime video/x-mng - -# JNG Video Format, -0 string \x8bJNG JNG video data, -!:mime video/x-jng - -# VRML (Virtual Reality Modelling Language) -0 string/w #VRML\ V1.0\ ascii VRML 1 file -!:mime model/vrml -0 string/w #VRML\ V2.0\ utf8 ISO/IEC 14772 VRML 97 file -!:mime model/vrml - -# X3D (Extensible 3D) [http://www.web3d.org/specifications/x3d-3.0.dtd] -# From Michel Briand -0 string/t \20 search/1000/cw \4 byte &0x40 -!:mime video/mp2p ->4 byte ^0x40 -!:mime video/mpeg -0 belong 0x000001BB -!:mime video/mpeg -0 belong 0x000001B0 -!:mime video/mp4v-es -0 belong 0x000001B5 -!:mime video/mp4v-es -0 belong 0x000001B3 -!:mime video/mpv -0 belong&0xFF5FFF1F 0x47400010 -!:mime video/mp2t -0 belong 0x00000001 ->4 byte&0x1F 0x07 -!:mime video/h264 diff --git a/magic/archive b/magic/archive deleted file mode 100644 index 35cbef4012..0000000000 --- a/magic/archive +++ /dev/null @@ -1,242 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: archive,v 1.78 2013/02/06 14:18:52 christos Exp $ -# archive: file(1) magic for archive formats (see also "msdos" for self- -# extracting compressed archives) -# -# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc. -# pre-POSIX "tar" archives are handled in the C code. - -# POSIX tar archives -257 string ustar\0 POSIX tar archive -!:mime application/x-tar # encoding: posix -257 string ustar\040\040\0 GNU tar archive -!:mime application/x-tar # encoding: gnu - -# cpio archives -# -# Yes, the top two "cpio archive" formats *are* supposed to just be "short". -# The idea is to indicate archives produced on machines with the same -# byte order as the machine running "file" with "cpio archive", and -# to indicate archives produced on machines with the opposite byte order -# from the machine running "file" with "byte-swapped cpio archive". -# -# The SVR4 "cpio(4)" hints that there are additional formats, but they -# are defined as "short"s; I think all the new formats are -# character-header formats and thus are strings, not numbers. -0 short 070707 cpio archive -!:mime application/x-cpio -0 short 0143561 byte-swapped cpio archive -!:mime application/x-cpio # encoding: swapped - -# -# System V Release 1 portable(?) archive format. -# -0 string = System V Release 1 ar archive -!:mime application/x-archive - -# -# Debian package; it's in the portable archive format, and needs to go -# before the entry for regular portable archives, as it's recognized as -# a portable archive whose first member has a name beginning with -# "debian". -# -0 string =!\ndebian -!:mime application/x-debian-package - -# -# MIPS archive; they're in the portable archive format, and need to go -# before the entry for regular portable archives, as it's recognized as -# a portable archive whose first member has a name beginning with -# "__________E". -# -0 string =!\n__________E MIPS archive -!:mime application/x-archive - -# -# BSD/SVR2-and-later portable archive formats. -# -0 string =! current ar archive -!:mime application/x-archive - -# ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com) -# -# The first byte is the magic (0x1a), byte 2 is the compression type for -# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS -# filename of the first file (null terminated). Since some types collide -# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%), -# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%). 0x01 collides with terminfo. -0 lelong&0x8080ffff 0x0000081a ARC archive data, dynamic LZW -!:mime application/x-arc -0 lelong&0x8080ffff 0x0000091a ARC archive data, squashed -!:mime application/x-arc -0 lelong&0x8080ffff 0x0000021a ARC archive data, uncompressed -!:mime application/x-arc -0 lelong&0x8080ffff 0x0000031a ARC archive data, packed -!:mime application/x-arc -0 lelong&0x8080ffff 0x0000041a ARC archive data, squeezed -!:mime application/x-arc -0 lelong&0x8080ffff 0x0000061a ARC archive data, crunched -!:mime application/x-arc -# [JW] stuff taken from idarc, obviously ARC successors: -0 lelong&0x8080ffff 0x00000a1a PAK archive data -!:mime application/x-arc -0 lelong&0x8080ffff 0x0000141a ARC+ archive data -!:mime application/x-arc -0 lelong&0x8080ffff 0x0000481a HYP archive data -!:mime application/x-arc - -# ARJ archiver (jason@jarthur.Claremont.EDU) -0 leshort 0xea60 ARJ archive data -!:mime application/x-arj - -# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu) -2 string -lh0- LHarc 1.x/ARX archive data [lh0] -!:mime application/x-lharc -2 string -lh1- LHarc 1.x/ARX archive data [lh1] -!:mime application/x-lharc -2 string -lz4- LHarc 1.x archive data [lz4] -!:mime application/x-lharc -2 string -lz5- LHarc 1.x archive data [lz5] -!:mime application/x-lharc -# [never seen any but the last; -lh4- reported in comp.compression:] -2 string -lzs- LHa/LZS archive data [lzs] -!:mime application/x-lha -2 string -lh\40- LHa 2.x? archive data [lh ] -!:mime application/x-lha -2 string -lhd- LHa 2.x? archive data [lhd] -!:mime application/x-lha -2 string -lh2- LHa 2.x? archive data [lh2] -!:mime application/x-lha -2 string -lh3- LHa 2.x? archive data [lh3] -!:mime application/x-lha -2 string -lh4- LHa (2.x) archive data [lh4] -!:mime application/x-lha -2 string -lh5- LHa (2.x) archive data [lh5] -!:mime application/x-lha -2 string -lh6- LHa (2.x) archive data [lh6] -!:mime application/x-lha -2 string -lh7- LHa (2.x)/LHark archive data [lh7] -!:mime application/x-lha - -# RAR archiver (Greg Roelofs, newt@uchicago.edu) -0 string Rar! RAR archive data, -!:mime application/x-rar - -# PKZIP multi-volume archive -0 string PK\x07\x08PK\x03\x04 Zip multi-volume archive data, at least PKZIP v2.50 to extract -!:mime application/zip - -# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu) -0 string PK\003\004 - -# Specialised zip formats which start with a member named 'mimetype' -# (stored uncompressed, with no 'extra field') containing the file's MIME type. -# Check for have 8-byte name, 0-byte extra field, name "mimetype", and -# contents starting with "application/": ->26 string \x8\0\0\0mimetypeapplication/ - -# OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8) -# http://lists.oasis-open.org/archives/office/200505/msg00006.html -# (mimetype contains "application/vnd.oasis.opendocument.") ->>50 string vnd.oasis.opendocument. OpenDocument ->>>73 string text ->>>>77 byte !0x2d Text -!:mime application/vnd.oasis.opendocument.text ->>>>77 string -template Text Template -!:mime application/vnd.oasis.opendocument.text-template ->>>>77 string -web HTML Document Template -!:mime application/vnd.oasis.opendocument.text-web ->>>>77 string -master Master Document -!:mime application/vnd.oasis.opendocument.text-master ->>>73 string graphics ->>>>81 byte !0x2d Drawing -!:mime application/vnd.oasis.opendocument.graphics ->>>>81 string -template Template -!:mime application/vnd.oasis.opendocument.graphics-template ->>>73 string presentation ->>>>85 byte !0x2d Presentation -!:mime application/vnd.oasis.opendocument.presentation ->>>>85 string -template Template -!:mime application/vnd.oasis.opendocument.presentation-template ->>>73 string spreadsheet ->>>>84 byte !0x2d Spreadsheet -!:mime application/vnd.oasis.opendocument.spreadsheet ->>>>84 string -template Template -!:mime application/vnd.oasis.opendocument.spreadsheet-template ->>>73 string chart ->>>>78 byte !0x2d Chart -!:mime application/vnd.oasis.opendocument.chart ->>>>78 string -template Template -!:mime application/vnd.oasis.opendocument.chart-template ->>>73 string formula ->>>>80 byte !0x2d Formula -!:mime application/vnd.oasis.opendocument.formula ->>>>80 string -template Template -!:mime application/vnd.oasis.opendocument.formula-template ->>>73 string database Database -!:mime application/vnd.oasis.opendocument.database ->>>73 string image ->>>>78 byte !0x2d Image -!:mime application/vnd.oasis.opendocument.image ->>>>78 string -template Template -!:mime application/vnd.oasis.opendocument.image-template - -# EPUB (OEBPS) books using OCF (OEBPS Container Format) -# http://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4. -# From: Ralf Brown ->0x1E string mimetypeapplication/epub+zip EPUB document -!:mime application/epub+zip - -# Catch other ZIP-with-mimetype formats -# In a ZIP file, the bytes immediately after a member's contents are -# always "PK". The 2 regex rules here print the "mimetype" member's -# contents up to the first 'P'. Luckily, most MIME types don't contain -# any capital 'P's. This is a kludge. -# (mimetype contains "application/") ->>50 string !epub+zip ->>>50 string !vnd.oasis.opendocument. ->>>>50 string !vnd.sun.xml. ->>>>>50 string !vnd.kde. ->>>>>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?) -!:mime application/zip -# (mimetype contents other than "application/*") ->26 string \x8\0\0\0mimetype ->>38 string !application/ ->>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?) -!:mime application/zip - -# Java Jar files ->(26.s+30) leshort 0xcafe Java Jar file data (zip) -!:mime application/jar - -# Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu) -# Next line excludes specialized formats: ->(26.s+30) leshort !0xcafe ->>26 string !\x8\0\0\0mimetype Zip archive data -!:mime application/zip - -# Zoo archiver -20 lelong 0xfdc4a7dc Zoo archive data -!:mime application/x-zoo - -# Shell archives -10 string #\ This\ is\ a\ shell\ archive shell archive text -!:mime application/octet-stream - -# Felix von Leitner -0 string d8:announce BitTorrent file -!:mime application/x-bittorrent - -# EET archive -# From: Tilman Sauerbeck -0 belong 0x1ee7ff00 EET archive -!:mime application/x-eet - -# Symbian installation files -# http://www.thouky.co.uk/software/psifs/sis.html -# http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf -8 lelong 0x10000419 Symbian installation file -!:mime application/vnd.symbian.install -0 lelong 0x10201A7A Symbian installation file (Symbian OS 9.x) -!:mime x-epoc/x-sisx-app diff --git a/magic/assembler b/magic/assembler deleted file mode 100644 index 242b6e19e2..0000000000 --- a/magic/assembler +++ /dev/null @@ -1,19 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: assembler,v 1.3 2013/01/04 17:23:28 christos Exp $ -# make: file(1) magic for assembler source -# -0 regex \^[\020\t]*\\.asciiz assembler source text -!:mime text/x-asm -0 regex \^[\020\t]*\\.byte assembler source text -!:mime text/x-asm -0 regex \^[\020\t]*\\.even assembler source text -!:mime text/x-asm -0 regex \^[\020\t]*\\.globl assembler source text -!:mime text/x-asm -0 regex \^[\020\t]*\\.text assembler source text -!:mime text/x-asm -0 regex \^[\020\t]*\\.file assembler source text -!:mime text/x-asm -0 regex \^[\020\t]*\\.type assembler source text -!:mime text/x-asm diff --git a/magic/audio b/magic/audio deleted file mode 100644 index 75a9dc536c..0000000000 --- a/magic/audio +++ /dev/null @@ -1,149 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: audio,v 1.65 2012/10/31 13:38:40 christos Exp $ -# audio: file(1) magic for sound formats (see also "iff") -# -# Jan Nicolai Langfeldt (janl@ifi.uio.no), Dan Quinlan (quinlan@yggdrasil.com), -# and others -# - -# Sun/NeXT audio data -0 string .snd Sun/NeXT audio data: ->12 belong 1 8-bit ISDN mu-law, -!:mime audio/basic ->12 belong 2 8-bit linear PCM [REF-PCM], -!:mime audio/basic ->12 belong 3 16-bit linear PCM, -!:mime audio/basic ->12 belong 4 24-bit linear PCM, -!:mime audio/basic ->12 belong 5 32-bit linear PCM, -!:mime audio/basic ->12 belong 6 32-bit IEEE floating point, -!:mime audio/basic ->12 belong 7 64-bit IEEE floating point, -!:mime audio/basic ->12 belong 23 8-bit ISDN mu-law compressed (CCITT G.721 ADPCM voice enc.), -!:mime audio/x-adpcm - -# DEC systems (e.g. DECstation 5000) use a variant of the Sun/NeXT format -# that uses little-endian encoding and has a different magic number -0 lelong 0x0064732E DEC audio data: ->12 lelong 1 8-bit ISDN mu-law, -!:mime audio/x-dec-basic ->12 lelong 2 8-bit linear PCM [REF-PCM], -!:mime audio/x-dec-basic ->12 lelong 3 16-bit linear PCM, -!:mime audio/x-dec-basic ->12 lelong 4 24-bit linear PCM, -!:mime audio/x-dec-basic ->12 lelong 5 32-bit linear PCM, -!:mime audio/x-dec-basic ->12 lelong 6 32-bit IEEE floating point, -!:mime audio/x-dec-basic ->12 lelong 7 64-bit IEEE floating point, -!:mime audio/x-dec-basic ->12 lelong 23 8-bit ISDN mu-law compressed (CCITT G.721 ADPCM voice enc.), -!:mime audio/x-dec-basic - -# Creative Labs AUDIO stuff -0 string MThd Standard MIDI data -!:mime audio/midi - -0 string CTMF Creative Music (CMF) data -!:mime audio/x-unknown -0 string SBI SoundBlaster instrument data -!:mime audio/x-unknown -0 string Creative\ Voice\ File Creative Labs voice data -!:mime audio/x-unknown - -# Real Audio (Magic .ra\0375) -0 belong 0x2e7261fd RealAudio sound file -!:mime audio/x-pn-realaudio -0 string .RMF\0\0\0 RealMedia file -!:mime application/vnd.rn-realmedia - -# mime types according to http://www.geocities.com/nevilo/mod.htm: -# audio/it .it -# audio/x-zipped-it .itz -# audio/xm fasttracker modules -# audio/x-s3m screamtracker modules -# audio/s3m screamtracker modules -# audio/x-zipped-mod mdz -# audio/mod mod -# audio/x-mod All modules (mod, s3m, 669, mtm, med, xm, it, mdz, stm, itz, xmz, s3z) - -# -# Taken from loader code from mikmod version 2.14 -# by Steve McIntyre (stevem@chiark.greenend.org.uk) -# added title printing on 2003-06-24 -0 string MAS_UTrack_V00 ->14 string >/0 ultratracker V1.%.1s module sound data -!:mime audio/x-mod -#audio/x-tracker-module - -0 string Extended\ Module: Fasttracker II module sound data -!:mime audio/x-mod -#audio/x-tracker-module - -21 string/c =!SCREAM! Screamtracker 2 module sound data -!:mime audio/x-mod -#audio/x-screamtracker-module -21 string BMOD2STM Screamtracker 2 module sound data -!:mime audio/x-mod -#audio/x-screamtracker-module -1080 string M.K. 4-channel Protracker module sound data -!:mime audio/x-mod -#audio/x-protracker-module -1080 string M!K! 4-channel Protracker module sound data -!:mime audio/x-mod -#audio/x-protracker-module -1080 string FLT4 4-channel Startracker module sound data -!:mime audio/x-mod -#audio/x-startracker-module -1080 string FLT8 8-channel Startracker module sound data -!:mime audio/x-mod -#audio/x-startracker-module -1080 string 4CHN 4-channel Fasttracker module sound data -!:mime audio/x-mod -#audio/x-fasttracker-module -1080 string 6CHN 6-channel Fasttracker module sound data -!:mime audio/x-mod -#audio/x-fasttracker-module -1080 string 8CHN 8-channel Fasttracker module sound data -!:mime audio/x-mod -#audio/x-fasttracker-module -1080 string CD81 8-channel Octalyser module sound data -!:mime audio/x-mod -#audio/x-octalysertracker-module -1080 string OKTA 8-channel Octalyzer module sound data -!:mime audio/x-mod -#audio/x-octalysertracker-module -# Not good enough. -#1082 string CH -#>1080 string >/0 %.2s-channel Fasttracker "oktalyzer" module sound data -1080 string 16CN 16-channel Taketracker module sound data -!:mime audio/x-mod -#audio/x-taketracker-module -1080 string 32CN 32-channel Taketracker module sound data -!:mime audio/x-mod -#audio/x-taketracker-module - -# Impulse tracker module (audio/x-it) -0 string IMPM Impulse Tracker module sound data - -!:mime audio/x-mod - -# Free lossless audio codec -# From: Przemyslaw Augustyniak -0 string fLaC FLAC audio bitstream data -!:mime audio/x-flac - -# Monkey's Audio compressed audio format (.ape) -# From danny.milo@gmx.net (Danny Milosavljevic) -# New version from Abel Cheung -0 string MAC\040 Monkey's Audio compressed format -!:mime audio/x-ape - -# musepak support From: "Jiri Pejchal" -0 string MP+ Musepack audio -!:mime audio/x-musepack diff --git a/magic/c-lang b/magic/c-lang deleted file mode 100644 index 525dc6b599..0000000000 --- a/magic/c-lang +++ /dev/null @@ -1,47 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: c-lang,v 1.16 2011/12/09 08:02:16 rrt Exp $ -# c-lang: file(1) magic for C and related languages programs -# - -# BCPL -0 search/8192 "libhdr" BCPL source text -!:mime text/x-bcpl -0 search/8192 "LIBHDR" BCPL source text -!:mime text/x-bcpl - -# C -0 regex \^#include C source text -!:mime text/x-c -0 regex \^char C source text -!:mime text/x-c -0 regex \^double C source text -!:mime text/x-c -0 regex \^extern C source text -!:mime text/x-c -0 regex \^float C source text -!:mime text/x-c -0 regex \^struct C source text -!:mime text/x-c -0 regex \^union C source text -!:mime text/x-c -0 search/8192 main( C source text -!:mime text/x-c - -# C++ -# The strength of these rules is increased so they beat the C rules above -0 regex \^template C++ source text -!:strength + 5 -!:mime text/x-c++ -0 regex \^virtual C++ source text -!:strength + 5 -!:mime text/x-c++ -0 regex \^class C++ source text -!:strength + 5 -!:mime text/x-c++ -0 regex \^public: C++ source text -!:strength + 5 -!:mime text/x-c++ -0 regex \^private: C++ source text -!:strength + 5 -!:mime text/x-c++ diff --git a/magic/cafebabe b/magic/cafebabe deleted file mode 100644 index 29fefd5f1e..0000000000 --- a/magic/cafebabe +++ /dev/null @@ -1,31 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: cafebabe,v 1.13 2013/02/26 21:04:38 christos Exp $ -# Cafe Babes unite! -# -# Since Java bytecode and Mach-O universal binaries have the same magic number, -# the test must be performed in the same "magic" sequence to get both right. -# The long at offset 4 in a Mach-O universal binary tells the number of -# architectures; the short at offset 4 in a Java bytecode file is the JVM minor -# version and the short at offset 6 is the JVM major version. Since there are only -# only 18 labeled Mach-O architectures at current, and the first released -# Java class format was version 43.0, we can safely choose any number -# between 18 and 39 to test the number of architectures against -# (and use as a hack). Let's not use 18, because the Mach-O people -# might add another one or two as time goes by... -# -### JAVA START ### -0 belong 0xcafebabe -!:mime application/x-java-applet - -0 belong 0xcafed00d JAR compressed with pack200, ->5 byte x version %d. ->4 byte x \b%d -!:mime application/x-java-pack200 - -0 belong 0xcafed00d JAR compressed with pack200, ->5 byte x version %d. ->4 byte x \b%d -!:mime application/x-java-pack200 - -### JAVA END ### diff --git a/magic/commands b/magic/commands deleted file mode 100644 index 6ad7699c5e..0000000000 --- a/magic/commands +++ /dev/null @@ -1,82 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: commands,v 1.44 2013/02/05 15:20:47 christos Exp $ -# commands: file(1) magic for various shells and interpreters -# -#0 string/w : shell archive or script for antique kernel text -0 string/wt #!\ /bin/sh POSIX shell script text executable -!:mime text/x-shellscript -0 string/wt #!\ /bin/csh C shell script text executable -!:mime text/x-shellscript -# korn shell magic, sent by George Wu, gwu@clyde.att.com -0 string/wt #!\ /bin/ksh Korn shell script text executable -!:mime text/x-shellscript -0 string/wt #!\ /bin/tcsh Tenex C shell script text executable -!:mime text/x-shellscript -0 string/wt #!\ /usr/bin/tcsh Tenex C shell script text executable -!:mime text/x-shellscript -0 string/wt #!\ /usr/local/tcsh Tenex C shell script text executable -!:mime text/x-shellscript -0 string/wt #!\ /usr/local/bin/tcsh Tenex C shell script text executable -!:mime text/x-shellscript - -# -# zsh/ash/ae/nawk/gawk magic from cameron@cs.unsw.oz.au (Cameron Simpson) -0 string/wt #!\ /bin/zsh Paul Falstad's zsh script text executable -!:mime text/x-shellscript -0 string/wt #!\ /usr/bin/zsh Paul Falstad's zsh script text executable -!:mime text/x-shellscript -0 string/wt #!\ /usr/local/bin/zsh Paul Falstad's zsh script text executable -!:mime text/x-shellscript -0 string/wt #!\ /usr/local/bin/ash Neil Brown's ash script text executable -!:mime text/x-shellscript -0 string/wt #!\ /usr/local/bin/ae Neil Brown's ae script text executable -!:mime text/x-shellscript -0 string/wt #!\ /bin/nawk new awk script text executable -!:mime text/x-nawk -0 string/wt #!\ /usr/bin/nawk new awk script text executable -!:mime text/x-nawk -0 string/wt #!\ /usr/local/bin/nawk new awk script text executable -!:mime text/x-nawk -0 string/wt #!\ /bin/gawk GNU awk script text executable -!:mime text/x-gawk -0 string/wt #!\ /usr/bin/gawk GNU awk script text executable -!:mime text/x-gawk -0 string/wt #!\ /usr/local/bin/gawk GNU awk script text executable -!:mime text/x-gawk -# -0 string/wt #!\ /bin/awk awk script text executable -!:mime text/x-awk -0 string/wt #!\ /usr/bin/awk awk script text executable -!:mime text/x-awk - -# bash shell magic, from Peter Tobias (tobias@server.et-inf.fho-emden.de) -0 string/wt #!\ /bin/bash Bourne-Again shell script text executable -!:mime text/x-shellscript -0 string/wt #!\ /usr/bin/bash Bourne-Again shell script text executable -!:mime text/x-shellscript -0 string/wt #!\ /usr/local/bash Bourne-Again shell script text executable -!:mime text/x-shellscript -0 string/wt #!\ /usr/local/bin/bash Bourne-Again shell script text executable -!:mime text/x-shellscript - -# PHP scripts -# Ulf Harnhammar -0 search/1/c = -0 string =24 regex [0-9.]+ \b, version %s -!:mime text/x-php diff --git a/magic/compress b/magic/compress deleted file mode 100644 index f2598b783f..0000000000 --- a/magic/compress +++ /dev/null @@ -1,77 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: compress,v 1.48 2011/12/07 18:39:43 christos Exp $ -# compress: file(1) magic for pure-compression formats (no archives) -# -# compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, etc. -# -# Formats for various forms of compressed data -# Formats for "compress" proper have been moved into "compress.c", -# because it tries to uncompress it to figure out what's inside. - -# standard unix compress -0 string \037\235 compress'd data -!:mime application/x-compress -!:apple LZIVZIVU - -# gzip (GNU zip, not to be confused with Info-ZIP or PKWARE zip archiver) -# Edited by Chris Chittleborough , March 2002 -# * Original filename is only at offset 10 if "extra field" absent -# * Produce shorter output - notably, only report compression methods -# other than 8 ("deflate", the only method defined in RFC 1952). -0 string \037\213 gzip compressed data -!:mime application/x-gzip - -# packed data, Huffman (minimum redundancy) codes on a byte-by-byte basis -0 string \037\036 packed data -!:mime application/octet-stream - -# -# This magic number is byte-order-independent. -0 short 0x1f1f old packed data -!:mime application/octet-stream - -# XXX - why *two* entries for "compacted data", one of which is -# byte-order independent, and one of which is byte-order dependent? -# -0 short 0x1fff compacted data -!:mime application/octet-stream -# This string is valid for SunOS (BE) and a matching "short" is listed -# in the Ultrix (LE) magic file. -0 string \377\037 compacted data -!:mime application/octet-stream -0 short 0145405 huf output -!:mime application/octet-stream - -# bzip2 -0 string BZh bzip2 compressed data -!:mime application/x-bzip2 - -# lzip -0 string LZIP lzip compressed data -!:mime application/x-lzip - -# 7-zip archiver, from Thomas Klausner (wiz@danbala.tuwien.ac.at) -# http://www.7-zip.org or DOC/7zFormat.txt -# -0 string 7z\274\257\047\034 7-zip archive data, ->6 byte x version %d ->7 byte x \b.%d -!:mime application/x-7z-compressed - -# Type: LZMA -0 lelong&0xffffff =0x5d ->12 leshort =0xff LZMA compressed data, ->>5 lequad =0xffffffffffffffff streamed ->>5 lequad !0xffffffffffffffff non-streamed, size %lld -!:mime application/x-lzma - -# http://tukaani.org/xz/xz-file-format.txt -0 ustring \xFD7zXZ\x00 XZ compressed data -!:mime application/x-xz - -# https://github.com/ckolivas/lrzip/blob/master/doc/magic.header.txt -0 string LRZI LRZIP compressed data ->4 byte x - version %d ->5 byte x \b.%d -!:mime application/x-lrzip diff --git a/magic/database b/magic/database deleted file mode 100644 index f1c09c0629..0000000000 --- a/magic/database +++ /dev/null @@ -1,47 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: database,v 1.32 2013/02/06 14:18:52 christos Exp $ -# database: file(1) magic for various databases -# -# extracted from header/code files by Graeme Wilford (eep2gw@ee.surrey.ac.uk) -# -# -# GDBM magic numbers -# Will be maintained as part of the GDBM distribution in the future. -# -0 belong 0x13579ace GNU dbm 1.x or ndbm database, big endian -!:mime application/x-gdbm -0 lelong 0x13579ace GNU dbm 1.x or ndbm database, little endian -!:mime application/x-gdbm -0 string GDBM GNU dbm 2.x database -!:mime application/x-gdbm -# -# Berkeley DB -# -# Ian Darwin's file /etc/magic files: big/little-endian version. -# -# Hash 1.85/1.86 databases store metadata in network byte order. -# Btree 1.85/1.86 databases store the metadata in host byte order. -# Hash and Btree 2.X and later databases store the metadata in host byte order. - -0 long 0x00061561 Berkeley DB -!:mime application/x-dbm - -# MS Access database -4 string Standard\ Jet\ DB Microsoft Access Database -!:mime application/x-msaccess -4 string Standard\ ACE\ DB Microsoft Access Database -!:mime application/x-msaccess - -# Tokyo Cabinet magic data -# http://tokyocabinet.sourceforge.net/index.html -0 string ToKyO\ CaBiNeT\n Tokyo Cabinet ->14 string x \b (%s) ->32 byte 0 \b, Hash -!:mime application/x-tokyocabinet-hash ->32 byte 1 \b, B+ tree -!:mime application/x-tokyocabinet-btree ->32 byte 2 \b, Fixed-length -!:mime application/x-tokyocabinet-fixed ->32 byte 3 \b, Table -!:mime application/x-tokyocabinet-table diff --git a/magic/diff b/magic/diff deleted file mode 100644 index b6504f17a0..0000000000 --- a/magic/diff +++ /dev/null @@ -1,25 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: diff,v 1.13 2012/06/16 14:43:36 christos Exp $ -# diff: file(1) magic for diff(1) output -# -0 search/1 diff\ diff output text -!:mime text/x-diff -0 search/1 ***\ diff output text -!:mime text/x-diff -0 search/1 Only\ in\ diff output text -!:mime text/x-diff -0 search/1 Common\ subdirectories:\ diff output text -!:mime text/x-diff - -0 search/1 Index: RCS/CVS diff output text -!:mime text/x-diff - -# unified diff -0 search/4096 ---\ ->&0 search/1024 \n ->>&0 search/1 +++\ ->>>&0 search/1024 \n ->>>>&0 search/1 @@ unified diff output text -!:mime text/x-diff -!:strength + 90 diff --git a/magic/elf b/magic/elf deleted file mode 100644 index aaf80cf10e..0000000000 --- a/magic/elf +++ /dev/null @@ -1,43 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# elf: file(1) magic for ELF executables -# -# We have to check the byte order flag to see what byte order all the -# other stuff in the header is in. -# -# What're the correct byte orders for the nCUBE and the Fujitsu VPP500? -# -# Created by: unknown -# Modified by (1): Daniel Quinlan -# Modified by (2): Peter Tobias (core support) -# Modified by (3): Christian 'Dr. Disk' Hechelmann (fix of core support) -# Modified by (4): (VMS Itanium) -# Modified by (5): Matthias Urlichs (Listing of many architectures) -0 string \177ELF ELF ->4 byte 0 invalid class ->4 byte 1 32-bit ->4 byte 2 64-bit ->5 byte 0 invalid byte order ->5 byte 1 LSB ->>16 leshort 0 no file type, -!:strength *2 -!:mime application/octet-stream ->>16 leshort 1 relocatable, -!:mime application/x-object ->>16 leshort 2 executable, -!:mime application/x-executable ->>16 leshort 3 shared object, -!:mime application/x-sharedlib ->>16 leshort 4 core file -!:mime application/x-coredump ->5 byte 2 MSB ->>16 beshort 0 no file type, -!:mime application/octet-stream ->>16 beshort 1 relocatable, -!:mime application/x-object ->>16 beshort 2 executable, -!:mime application/x-executable ->>16 beshort 3 shared object, -!:mime application/x-sharedlib ->>16 beshort 4 core file, -!:mime application/x-coredump diff --git a/magic/epoc b/magic/epoc deleted file mode 100644 index d7397145fb..0000000000 --- a/magic/epoc +++ /dev/null @@ -1,34 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: epoc,v 1.7 2009/09/19 16:28:09 christos Exp $ -# EPOC : file(1) magic for EPOC documents [Psion Series 5/Osaris/Geofox 1] -# Stefan Praszalowicz and Peter Breitenlohner -# Useful information for improving this file can be found at: -# http://software.frodo.looijaard.name/psiconv/formats/Index.html -#------------------------------------------------------------------------------ -0 lelong 0x10000037 Psion Series 5 ->4 lelong 0x10000042 multi-bitmap image -!:mime image/x-epoc-mbm ->4 lelong 0x1000006D ->>8 lelong 0x1000007D Sketch image -!:mime image/x-epoc-sketch ->>8 lelong 0x1000007F Word file -!:mime application/x-epoc-word ->>8 lelong 0x10000085 OPL program (TextEd) -!:mime application/x-epoc-opl ->>8 lelong 0x10000088 Sheet file -!:mime application/x-epoc-sheet ->4 lelong 0x10000073 OPO module -!:mime application/x-epoc-opo ->4 lelong 0x10000074 OPL application -!:mime application/x-epoc-app - - -0 lelong 0x10000050 Psion Series 5 ->4 lelong 0x1000006D database ->>8 lelong 0x10000084 Agenda file -!:mime application/x-epoc-agenda ->>8 lelong 0x10000086 Data file -!:mime application/x-epoc-data ->>8 lelong 0x10000CEA Jotter file -!:mime application/x-epoc-jotter diff --git a/magic/filesystems b/magic/filesystems deleted file mode 100644 index d2178296e0..0000000000 --- a/magic/filesystems +++ /dev/null @@ -1,12 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: filesystems,v 1.76 2013/02/18 18:45:41 christos Exp $ -# filesystems: file(1) magic for different filesystems -# - -# CDROM Filesystems -# Modified for UDF by gerardo.cacciari@gmail.com -32769 string CD001 # -!:mime application/x-iso9660-image -37633 string CD001 ISO 9660 CD-ROM filesystem data (raw 2352 byte sectors) -!:mime application/x-iso9660-image diff --git a/magic/flash b/magic/flash deleted file mode 100644 index b64761b12d..0000000000 --- a/magic/flash +++ /dev/null @@ -1,18 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: flash,v 1.8 2009/09/19 16:28:09 christos Exp $ -# flash: file(1) magic for Macromedia Flash file format -# -# See -# -# http://www.macromedia.com/software/flash/open/ -# -0 string FWS Macromedia Flash data, ->3 byte x version %d -!:mime application/x-shockwave-flash -0 string CWS Macromedia Flash data (compressed), -!:mime application/x-shockwave-flash - -# From: Cal Peake -0 string FLV Macromedia Flash Video -!:mime video/x-flv diff --git a/magic/fonts b/magic/fonts deleted file mode 100644 index 8189131d15..0000000000 --- a/magic/fonts +++ /dev/null @@ -1,32 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: fonts,v 1.25 2013/02/06 14:18:52 christos Exp $ -# fonts: file(1) magic for font data -# - -# X11 font files in SNF (Server Natural Format) format -# updated by Joerg Jenderek at Feb 2013 -# http://computer-programming-forum.com/51-perl/8f22fb96d2e34bab.htm -0 belong 00000004 X11 SNF font data, MSB first -#>104 belong 00000004 X11 SNF font data, MSB first -!:mime application/x-font-sfn -# GRR: line below too general as it catches also Xbase index file t3-CHAR.NDX -0 lelong 00000004 ->104 lelong 00000004 X11 SNF font data, LSB first -!:mime application/x-font-sfn - -# True Type fonts -0 string \000\001\000\000\000 TrueType font data -!:mime application/x-font-ttf - -# Opentype font data from Avi Bercovich -0 string OTTO OpenType font data -!:mime application/vnd.ms-opentype - -# Gurkan Sengun , www.linuks.mine.nu -0 string SplineFontDB: Spline Font Database -!:mime application/vnd.font-fontforge-sfd - -# EOT -34 string LP Embedded OpenType (EOT) -!:mime application/vnd.ms-fontobject diff --git a/magic/fortran b/magic/fortran deleted file mode 100644 index 498eeacf8a..0000000000 --- a/magic/fortran +++ /dev/null @@ -1,7 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: fortran,v 1.6 2009/09/19 16:28:09 christos Exp $ -# FORTRAN source -0 regex/100 \^[Cc][\ \t] FORTRAN program -!:mime text/x-fortran -!:strength - 5 diff --git a/magic/frame b/magic/frame deleted file mode 100644 index b42943bfcd..0000000000 --- a/magic/frame +++ /dev/null @@ -1,31 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File$ -# frame: file(1) magic for FrameMaker files -# -# This stuff came on a FrameMaker demo tape, most of which is -# copyright, but this file is "published" as witness the following: -# -# Note that this is the Framemaker Maker Interchange Format, not the -# Normal format which would be application/vnd.framemaker. -# -0 string \6 string 3.0 (3.0) -#>6 string 2.0 (2.0) -#>6 string 1.0 (1.0) -0 string \ - -#------------------------------------------------------------------------------ -# XCF: file(1) magic for the XCF image format used in the GIMP developed -# by Spencer Kimball and Peter Mattis -# ('Bucky' LaDieu, nega@vt.edu) - -0 string gimp\ xcf GIMP XCF image data, -!:mime image/x-xcf diff --git a/magic/gnu b/magic/gnu deleted file mode 100644 index bf1f631751..0000000000 --- a/magic/gnu +++ /dev/null @@ -1,23 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: gnu,v 1.13 2012/01/03 17:16:54 christos Exp $ -# gnu: file(1) magic for various GNU tools -# -# GNU nlsutils message catalog file format -# -# GNU message catalog (.mo and .gmo files) - -# GnuPG -# The format is very similar to pgp -# Note: magic.mime had 0x8501 for the next line instead of 0x8502 -0 beshort 0x8502 GPG encrypted data -!:mime text/PGP # encoding: data - -# This magic is not particularly good, as the keyrings don't have true -# magic. Nevertheless, it covers many keyrings. -0 beshort 0x9901 GPG key public ring -!:mime application/x-gnupg-keyring - -# gettext message catalogue -0 regex \^msgid\ GNU gettext message catalogue text -!:mime text/x-po diff --git a/magic/gnumeric b/magic/gnumeric deleted file mode 100644 index b5edca93c1..0000000000 --- a/magic/gnumeric +++ /dev/null @@ -1,8 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File$ -# gnumeric: file(1) magic for Gnumeric spreadsheet -# This entry is only semi-helpful, as Gnumeric compresses its files, so -# they will ordinarily reported as "compressed", but at least -z helps -39 string =4 belong x \b, FORM is %d bytes long -# audio formats ->8 string AIFF \b, AIFF audio -!:mime audio/x-aiff ->8 string AIFC \b, AIFF-C compressed audio -!:mime audio/x-aiff ->8 string 8SVX \b, 8SVX 8-bit sampled sound voice -!:mime audio/x-aiff diff --git a/magic/images b/magic/images deleted file mode 100644 index 281aba4706..0000000000 --- a/magic/images +++ /dev/null @@ -1,255 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: images,v 1.80 2013/02/06 14:18:52 christos Exp $ -# images: file(1) magic for image formats (see also "iff", and "c-lang" for -# XPM bitmaps) -# -# originally from jef@helios.ee.lbl.gov (Jef Poskanzer), -# additions by janl@ifi.uio.no as well as others. Jan also suggested -# merging several one- and two-line files into here. -# -# little magic: PCX (first byte is 0x0a) - -# PBMPLUS images -# The next byte following the magic is always whitespace. -# strength is changed to try these patterns before "x86 boot sector" -0 search/1 P1 ->3 regex =[0-9]*\ [0-9]* Netpbm PBM image text ->3 regex =[0-9]+\ \b, size = %sx ->>3 regex =\ [0-9]+ \b%s -!:strength + 45 -!:mime image/x-portable-bitmap -0 search/1 P2 ->3 regex =[0-9]*\ [0-9]* Netpbm PGM image text ->3 regex =[0-9]+\ \b, size = %sx ->>3 regex =\ [0-9]+ \b%s -!:strength + 45 -!:mime image/x-portable-greymap -0 search/1 P3 Netpbm PPM image text ->3 regex =[0-9]*\ [0-9]* Netpbm PPM image text ->3 regex =[0-9]+\ \b, size = %sx ->>3 regex =\ [0-9]+ \b%s -!:strength + 45 -!:mime image/x-portable-pixmap -0 string P4 ->3 regex =[0-9]*\ [0-9]* Netpbm PBM "rawbits" image data ->3 regex =[0-9]+\ \b, size = %sx ->>3 regex =\ [0-9]+ \b%s -!:strength + 45 -!:mime image/x-portable-bitmap -0 string P5 ->3 regex =[0-9]*\ [0-9]* Netpbm PGM "rawbits" image data ->3 regex =[0-9]+\ \b, size = %sx ->>3 regex =\ [0-9]+ \b%s -!:strength + 45 -!:mime image/x-portable-greymap -0 string P6 ->3 regex =[0-9]*\ [0-9]* Netpbm PPM "rawbits" image data ->3 regex =[0-9]+\ \b, size = %sx ->>3 regex =\ [0-9]+ \b%s -!:strength + 45 -!:mime image/x-portable-pixmap -0 string P7 Netpbm PAM image file -!:mime image/x-portable-pixmap - -# NIFF (Navy Interchange File Format, a modification of TIFF) images -# [GRR: this *must* go before TIFF] -0 string IIN1 NIFF image data -!:mime image/x-niff - -# Canon RAW version 1 (CRW) files are a type of Canon Image File Format -# (CIFF) file. These are apparently all little-endian. -# From: Adam Buchbinder -# URL: http://www.sno.phy.queensu.ca/~phil/exiftool/canon_raw.html -0 string II\x1a\0\0\0HEAPCCDR Canon CIFF raw image data -!:mime image/x-canon-crw - -# Canon RAW version 2 (CR2) files are a kind of TIFF with an extra magic -# number. Put this above the TIFF test to make sure we detect them. -# These are apparently all little-endian. -# From: Adam Buchbinder -# URL: http://libopenraw.freedesktop.org/wiki/Canon_CR2 -0 string II\x2a\0\x10\0\0\0CR Canon CR2 raw image data -!:mime image/x-canon-cr2 - -# Tag Image File Format, from Daniel Quinlan (quinlan@yggdrasil.com) -# The second word of TIFF files is the TIFF version number, 42, which has -# never changed. The TIFF specification recommends testing for it. -0 string MM\x00\x2a TIFF image data, big-endian -!:mime image/tiff -0 string II\x2a\x00 TIFF image data, little-endian -!:mime image/tiff - -0 string MM\x00\x2b Big TIFF image data, big-endian -!:mime image/tiff -0 string II\x2b\x00 Big TIFF image data, little-endian -!:mime image/tiff - -# PNG [Portable Network Graphics, or "PNG's Not GIF"] images -# (Greg Roelofs, newt@uchicago.edu) -# (Albert Cahalan, acahalan@cs.uml.edu) -# -# 137 P N G \r \n ^Z \n [4-byte length] H E A D [HEAD data] [HEAD crc] ... -# -0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data -!:mime image/png - -# possible GIF replacements; none yet released! -# (Greg Roelofs, newt@uchicago.edu) -# -# GRR 950115: this was mine ("Zip GIF"): -0 string GIF94z ZIF image (GIF+deflate alpha) -!:mime image/x-unknown -# -# GRR 950115: this is Jeremy Wohl's Free Graphics Format (better): -# -0 string FGF95a FGF image (GIF+deflate beta) -!:mime image/x-unknown -# -# GRR 950115: this is Thomas Boutell's Portable Bitmap Format proposal -# (best; not yet implemented): -# -0 string PBF PBF image (deflate compression) -!:mime image/x-unknown - -# GIF -0 string GIF8 GIF image data -!:mime image/gif -!:apple 8BIMGIFf - -# From: Joerg Jenderek -# most files with the extension .EPA and some with .BMP -0 string \x11\x06 Award BIOS Logo, 136 x 84 -!:mime image/x-award-bioslogo -0 string \x11\x09 Award BIOS Logo, 136 x 126 -!:mime image/x-award-bioslogo -#0 string \x07\x1f BIOS Logo corrupted? -# http://www.blackfiveservices.co.uk/awbmtools.shtml -# http://biosgfx.narod.ru/v3/ -# http://biosgfx.narod.ru/abr-2/ -0 string AWBM ->4 leshort <1981 Award BIOS bitmap -!:mime image/x-award-bmp - -# PC bitmaps (OS/2, Windows BMP files) (Greg Roelofs, newt@uchicago.edu) -0 string BM ->14 leshort 12 PC bitmap, OS/2 1.x format -!:mime image/x-ms-bmp ->14 leshort 64 PC bitmap, OS/2 2.x format -!:mime image/x-ms-bmp ->14 leshort 40 PC bitmap, Windows 3.x format -!:mime image/x-ms-bmp ->14 leshort 128 PC bitmap, Windows NT/2000 format -!:mime image/x-ms-bmp - -# XPM icons (Greg Roelofs, newt@uchicago.edu) -0 search/1 /*\ XPM\ */ X pixmap image text -!:mime image/x-xpmi - -# DICOM medical imaging data -128 string DICM DICOM medical imaging data -!:mime application/dicom - -# XWD - X Window Dump file. -# As described in /usr/X11R6/include/X11/XWDFile.h -# used by the xwd program. -# Bradford Castalia, idaeim, 1/01 -# updated by Adam Buchbinder, 2/09 -# The following assumes version 7 of the format; the first long is the length -# of the header, which is at least 25 4-byte longs, and the one at offset 8 -# is a constant which is always either 1 or 2. Offset 12 is the pixmap depth, -# which is a maximum of 32. -0 belong >100 ->8 belong <3 ->>12 belong <33 ->>>4 belong 7 XWD X Window Dump image data -!:mime image/x-xwindowdump - -# PCX image files -# From: Dan Fandrich -# updated by Joerg Jenderek at Feb 2013 by http://de.wikipedia.org/wiki/PCX -# http://web.archive.org/web/20100206055706/http://www.qzx.com/pc-gpe/pcx.txt -# GRR: original test was still too general as it catches xbase examples T5.DBT,T6.DBT with 0xa000000 -# test for bytes 0x0a,version byte (0,2,3,4,5),compression byte flag(0,1), bit depth (>0) of PCX or T5.DBT,T6.DBT -0 ubelong&0xffF8fe00 0x0a000000 -# for PCX bit depth > 0 ->3 ubyte >0 -# test for valid versions ->>1 ubyte <6 ->>>1 ubyte !1 PCX -!:mime image/x-pcx - -# Adobe Photoshop -# From: Asbjoern Sloth Toennesen -0 string 8BPS Adobe Photoshop Image -!:mime image/vnd.adobe.photoshop - -# Summary: DjVu image / document -# Extension: .djvu -# Reference: http://djvu.org/docs/DjVu3Spec.djvu -# Submitted by: Stephane Loeuillet -# Modified by (1): Abel Cheung -0 string AT&TFORM ->12 string DJVM DjVu multiple page document -!:mime image/vnd.djvu ->12 string DJVU DjVu image or single page document -!:mime image/vnd.djvu ->12 string DJVI DjVu shared document -!:mime image/vnd.djvu ->12 string THUM DjVu page thumbnails -!:mime image/vnd.djvu - -# Originally by Marc Espie -# Modified by Robert Minsk -# http://www.openexr.com/openexrfilelayout.pdf -0 lelong 20000630 OpenEXR image data, -!:mime image/x-exr - -# SMPTE Digital Picture Exchange Format, SMPTE DPX -# -# ANSI/SMPTE 268M-1994, SMPTE Standard for File Format for Digital -# Moving-Picture Exchange (DPX), v1.0, 18 February 1994 -# Robert Minsk -0 string SDPX DPX image data, big-endian, -!:mime image/x-dpx - -#----------------------------------------------------------------------- -# Hierarchical Data Format, used to facilitate scientific data exchange -# specifications at http://hdf.ncsa.uiuc.edu/ -0 belong 0x0e031301 Hierarchical Data Format (version 4) data -!:mime application/x-hdf -0 string \211HDF\r\n\032\n Hierarchical Data Format (version 5) data -!:mime application/x-hdf - -# http://www.cartesianinc.com/Tech/ -0 string CPC\262 Cartesian Perceptual Compression image -!:mime image/x-cpi - - -# Polar Monitor Bitmap (.pmb) used as logo for Polar Electro watches -# From: Markus Heidelberg -0 string/t [BitmapInfo2] Polar Monitor Bitmap text -!:mime image/x-polar-monitor-bitmap - -# Type: Olympus ORF raw images. -# URL: http://libopenraw.freedesktop.org/wiki/Olympus_ORF -# From: Adam Buchbinder -0 string MMOR Olympus ORF raw image data, big-endian -!:mime image/x-olympus-orf -0 string IIRO Olympus ORF raw image data, little-endian -!:mime image/x-olympus-orf -0 string IIRS Olympus ORF raw image data, little-endian -!:mime image/x-olympus-orf - -# Type: Foveon X3F -# URL: http://www.photofo.com/downloads/x3f-raw-format.pdf -# From: Adam Buchbinder -# Note that the MIME type isn't defined anywhere that I can find; if -# there's a canonical type for this format, it should replace this one. -0 string FOVb Foveon X3F raw image data -!:mime image/x-x3f - -# Paint.NET file -# From Adam Buchbinder -0 string PDN3 Paint.NET image data -!:mime image/x-paintnet diff --git a/magic/java b/magic/java deleted file mode 100644 index 481ffec160..0000000000 --- a/magic/java +++ /dev/null @@ -1,16 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------ -# $File: java,v 1.13 2011/12/08 12:12:46 rrt Exp $ -# Java ByteCode and Mach-O binaries (e.g., Mac OS X) use the -# same magic number, 0xcafebabe, so they are both handled -# in the entry called "cafebabe". -#------------------------------------------------------------ - -0 belong 0xfeedfeed Java KeyStore -!:mime application/x-java-keystore -0 belong 0xcececece Java JCE KeyStore -!:mime application/x-java-jce-keystore - -# Java source -0 regex ^import.*;$ Java source -!:mime text/x-java diff --git a/magic/javascript b/magic/javascript deleted file mode 100644 index a1311d0e71..0000000000 --- a/magic/javascript +++ /dev/null @@ -1,17 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: $ -# javascript: magic for javascript and node.js scripts. -# -0 search/1/w #!/bin/node Node.js script text executable -!:mime application/javascript -0 search/1/w #!/usr/bin/node Node.js script text executable -!:mime application/javascript -0 search/1/w #!/bin/nodejs Node.js script text executable -!:mime application/javascript -0 search/1/w #!/usr/bin/nodejs Node.js script text executable -!:mime application/javascript -0 search/1 #!/usr/bin/env\ node Node.js script text executable -!:mime application/javascript -0 search/1 #!/usr/bin/env\ nodejs Node.js script text executable -!:mime application/javascript diff --git a/magic/jpeg b/magic/jpeg deleted file mode 100644 index 55fedae4b4..0000000000 --- a/magic/jpeg +++ /dev/null @@ -1,31 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: jpeg,v 1.18 2012/08/01 12:12:36 christos Exp $ -# JPEG images -# SunOS 5.5.1 had -# -# 0 string \377\330\377\340 JPEG file -# 0 string \377\330\377\356 JPG file -# -# both of which turn into "JPEG image data" here. -# -0 beshort 0xffd8 JPEG image data -!:mime image/jpeg -!:apple 8BIMJPEG -!:strength +2 - -# From: David Santinoli -0 string \x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A JPEG 2000 -# From: Johan van der Knijff -# Added sub-entries for JP2, JPX, JPM and MJ2 formats; added mimetypes -# https://github.com/bitsgalore/jp2kMagic -# -# Now read value of 'Brand' field, which yields a few possibilities: ->20 string \x6a\x70\x32\x20 Part 1 (JP2) -!:mime image/jp2 ->20 string \x6a\x70\x78\x20 Part 2 (JPX) -!:mime image/jpx ->20 string \x6a\x70\x6d\x20 Part 6 (JPM) -!:mime image/jpm ->20 string \x6d\x6a\x70\x32 Part 3 (MJ2) -!:mime video/mj2 diff --git a/magic/kde b/magic/kde deleted file mode 100644 index 2b66ee611d..0000000000 --- a/magic/kde +++ /dev/null @@ -1,11 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: kde,v 1.4 2009/09/19 16:28:10 christos Exp $ -# kde: file(1) magic for KDE - -0 string/t [KDE\ Desktop\ Entry] KDE desktop entry -!:mime application/x-kdelnk -0 string/t #\ KDE\ Config\ File KDE config file -!:mime application/x-kdelnk -0 string/t #\ xmcd xmcd database file for kscd -!:mime text/x-xmcd diff --git a/magic/kml b/magic/kml deleted file mode 100644 index 608ff0e1b0..0000000000 --- a/magic/kml +++ /dev/null @@ -1,30 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: kml,v 1.2 2009/09/19 16:28:10 christos Exp $ -# Type: Google KML, formerly Keyhole Markup Language -# Future development of this format has been handed -# over to the Open Geospatial Consortium. -# http://www.opengeospatial.org/standards/kml/ -# From: Asbjoern Sloth Toennesen -0 string/t \20 search/400 \ xmlns= ->>&0 regex ['"]http://earth.google.com/kml Google KML document -!:mime application/vnd.google-earth.kml+xml - -#------------------------------------------------------------------------------ -# Type: OpenGIS KML, formerly Keyhole Markup Language -# This standard is maintained by the -# Open Geospatial Consortium. -# http://www.opengeospatial.org/standards/kml/ -# From: Asbjoern Sloth Toennesen ->>&0 regex ['"]http://www.opengis.net/kml OpenGIS KML document -!:mime application/vnd.google-earth.kml+xml - -#------------------------------------------------------------------------------ -# Type: Google KML Archive (ZIP based) -# http://code.google.com/apis/kml/documentation/kml_tut.html -# From: Asbjoern Sloth Toennesen -0 string PK\003\004 ->4 byte 0x14 ->>30 string doc.kml Compressed Google KML Document, including resources. -!:mime application/vnd.google-earth.kmz diff --git a/magic/linux b/magic/linux deleted file mode 100644 index 4a5c935760..0000000000 --- a/magic/linux +++ /dev/null @@ -1,22 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: linux,v 1.46 2013/01/06 21:26:48 christos Exp $ -# linux: file(1) magic for Linux files -# -# Values for Linux/i386 binaries, from Daniel Quinlan -# The following basic Linux magic is useful for reference, but using -# "long" magic is a better practice in order to avoid collisions. -# -# 2 leshort 100 Linux/i386 -# >0 leshort 0407 impure executable (OMAGIC) -# >0 leshort 0410 pure executable (NMAGIC) -# >0 leshort 0413 demand-paged executable (ZMAGIC) -# >0 leshort 0314 demand-paged executable (QMAGIC) -# - -# SYSLINUX boot logo files (from 'ppmtolss16' sources) -# http://www.syslinux.org/wiki/index.php/SYSLINUX#Display_graphic_from_filename: -# file extension .lss .16 -0 lelong =0x1413f33d SYSLINUX' LSS16 image data -# syslinux-4.05/mime/image/x-lss16.xml -!:mime image/x-lss16 diff --git a/magic/lisp b/magic/lisp deleted file mode 100644 index f5a06c8964..0000000000 --- a/magic/lisp +++ /dev/null @@ -1,42 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File$ -# lisp: file(1) magic for lisp programs -# -# various lisp types, from Daniel Quinlan (quinlan@yggdrasil.com) - -# updated by Joerg Jenderek -# GRR: This lot is too weak -#0 string ;; -# windows INF files often begin with semicolon and use CRLF as line end -# lisp files are mainly created on unix system with LF as line end -#>2 search/4096 !\r Lisp/Scheme program text -#>2 search/4096 \r Windows INF file - -0 search/4096 (setq\ Lisp/Scheme program text -!:mime text/x-lisp -0 search/4096 (defvar\ Lisp/Scheme program text -!:mime text/x-lisp -0 search/4096 (defparam\ Lisp/Scheme program text -!:mime text/x-lisp -0 search/4096 (defun\ Lisp/Scheme program text -!:mime text/x-lisp -0 search/4096 (autoload\ Lisp/Scheme program text -!:mime text/x-lisp -0 search/4096 (custom-set-variables\ Lisp/Scheme program text -!:mime text/x-lisp - -# Emacs 18 - this is always correct, but not very magical. -0 string \012( Emacs v18 byte-compiled Lisp data -!:mime application/x-elc -# Emacs 19+ - ver. recognition added by Ian Springer -# Also applies to XEmacs 19+ .elc files; could tell them apart with regexs -# - Chris Chittleborough -0 string ;ELC ->4 byte >18 ->4 byte <32 Emacs/XEmacs v%d byte-compiled Lisp data -!:mime application/x-elc - -# From: David Allouche -0 search/1 \, Seo Sanghyeon - -# Lua scripts -0 search/1/w #!\ /usr/bin/lua Lua script text executable -!:mime text/x-lua -0 search/1/w #!\ /usr/local/bin/lua Lua script text executable -!:mime text/x-lua -0 search/1 #!/usr/bin/env\ lua Lua script text executable -!:mime text/x-lua -0 search/1 #!\ /usr/bin/env\ lua Lua script text executable -!:mime text/x-lua - diff --git a/magic/m4 b/magic/m4 deleted file mode 100644 index 7262fca81b..0000000000 --- a/magic/m4 +++ /dev/null @@ -1,7 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File$ -# make: file(1) magic for M4 scripts -# -0 regex \^dnl\ M4 macro processor script text -!:mime text/x-m4 diff --git a/magic/macintosh b/magic/macintosh deleted file mode 100644 index 6398fc2ff2..0000000000 --- a/magic/macintosh +++ /dev/null @@ -1,21 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: macintosh,v 1.21 2010/09/20 19:19:17 rrt Exp $ -# macintosh description -# -# BinHex is the Macintosh ASCII-encoded file format (see also "apple") -# Daniel Quinlan, quinlan@yggdrasil.com -11 string must\ be\ converted\ with\ BinHex BinHex binary text -!:mime application/mac-binhex40 - -# Stuffit archives are the de facto standard of compression for Macintosh -# files obtained from most archives. (franklsm@tuns.ca) -0 string SIT! StuffIt Archive (data) -!:mime application/x-stuffit -!:apple SIT!SIT! - -# Newer StuffIt archives (grant@netbsd.org) -0 string StuffIt StuffIt Archive -!:mime application/x-stuffit -!:apple SIT!SIT! -#>162 string >0 : %s diff --git a/magic/mail.news b/magic/mail.news deleted file mode 100644 index c1a446d4ca..0000000000 --- a/magic/mail.news +++ /dev/null @@ -1,35 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: mail.news,v 1.21 2012/06/21 01:44:52 christos Exp $ -# mail.news: file(1) magic for mail and news -# -# Unfortunately, saved netnews also has From line added in some news software. -#0 string From mail text -0 string/t Relay-Version: old news text -!:mime message/rfc822 -0 string/t #!\ rnews batched news text -!:mime message/rfc822 -0 string/t N#!\ rnews mailed, batched news text -!:mime message/rfc822 -0 string/t Forward\ to mail forwarding text -!:mime message/rfc822 -0 string/t Pipe\ to mail piping text -!:mime message/rfc822 -0 string/tc delivered-to: SMTP mail text -!:mime message/rfc822 -0 string/tc return-path: SMTP mail text -!:mime message/rfc822 -0 string/t Path: news text -!:mime message/news -0 string/t Xref: news text -!:mime message/news -0 string/t From: news or mail text -!:mime message/rfc822 -0 string/t Article saved news text -!:mime message/news -0 string/t Received: RFC 822 mail text -!:mime message/rfc822 - -# TNEF files... -0 lelong 0x223E9F78 Transport Neutral Encapsulation Format -!:mime application/vnd.ms-tnef diff --git a/magic/make b/magic/make deleted file mode 100644 index 83d6a012dd..0000000000 --- a/magic/make +++ /dev/null @@ -1,16 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File$ -# make: file(1) magic for makefiles -# -0 regex \^CFLAGS makefile script text -!:mime text/x-makefile -0 regex \^LDFLAGS makefile script text -!:mime text/x-makefile -0 regex \^all: makefile script text -!:mime text/x-makefile -0 regex \^.PRECIOUS makefile script text -!:mime text/x-makefile - -0 regex \^SUBDIRS automake makefile script text -!:mime text/x-makefile diff --git a/magic/marc21 b/magic/marc21 deleted file mode 100644 index 26899d2e70..0000000000 --- a/magic/marc21 +++ /dev/null @@ -1,29 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#-------------------------------------------- -# marc21: file(1) magic for MARC 21 Format -# -# Kevin Ford (kefo@loc.gov) -# -# MARC21 formats are for the representation and communication -# of bibliographic and related information in machine-readable -# form. For more info, see http://www.loc.gov/marc/ - - -# leader position 20-21 must be 45 -20 string 45 - -# leader starts with 5 digits, followed by codes specific to MARC format ->0 regex/1 (^[0-9]{5})[acdnp][^bhlnqsu-z] MARC21 Bibliographic -!:mime application/marc ->0 regex/1 (^[0-9]{5})[acdnosx][z] MARC21 Authority -!:mime application/marc ->0 regex/1 (^[0-9]{5})[cdn][uvxy] MARC21 Holdings -!:mime application/marc -0 regex/1 (^[0-9]{5})[acdn][w] MARC21 Classification -!:mime application/marc ->0 regex/1 (^[0-9]{5})[cdn][q] MARC21 Community -!:mime application/marc - -# leader position 22-23, should be "00" but is it? ->0 regex/1 (^.{21})([^0]{2}) (non-conforming) -!:mime application/marc diff --git a/magic/matroska b/magic/matroska deleted file mode 100644 index c1791413cb..0000000000 --- a/magic/matroska +++ /dev/null @@ -1,17 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: matroska,v 1.7 2012/08/26 10:06:15 christos Exp $ -# matroska: file(1) magic for Matroska files -# -# See http://www.matroska.org/ -# - -# EBML id: -0 belong 0x1a45dfa3 -# DocType id: ->4 search/4096 \x42\x82 -# DocType contents: ->>&1 string webm WebM -!:mime video/webm ->>&1 string matroska Matroska data -!:mime video/x-matroska diff --git a/magic/misctools b/magic/misctools deleted file mode 100644 index 35fddaa61a..0000000000 --- a/magic/misctools +++ /dev/null @@ -1,9 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#----------------------------------------------------------------------------- -# $File: misctools,v 1.12 2010/09/29 18:36:49 rrt Exp $ -# misctools: file(1) magic for miscellaneous UNIX tools. -# -0 string/c BEGIN:VCALENDAR vCalendar calendar file -!:mime text/calendar -0 string/c BEGIN:VCARD vCard visiting card -!:mime text/x-vcard diff --git a/magic/msdos b/magic/msdos deleted file mode 100644 index cc411aeeb7..0000000000 --- a/magic/msdos +++ /dev/null @@ -1,369 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: msdos,v 1.84 2013/02/05 13:55:22 christos Exp $ -# msdos: file(1) magic for MS-DOS files -# - -# .BAT files (Daniel Quinlan, quinlan@yggdrasil.com) -# updated by Joerg Jenderek at Oct 2008,Apr 2011 -0 string/t @ ->1 string/cW \ echo\ off DOS batch file text -!:mime text/x-msdos-batch ->1 string/cW echo\ off DOS batch file text -!:mime text/x-msdos-batch ->1 string/cW rem DOS batch file text -!:mime text/x-msdos-batch ->1 string/cW set\ DOS batch file text -!:mime text/x-msdos-batch - -# Tests for various EXE types. -# -# Many of the compressed formats were extraced from IDARC 1.23 source code. -# -0 string/b MZ DOS MZ -!:mime application/x-dosexec -# All non-DOS EXE extensions have the relocation table more than 0x40 bytes into the file. ->0x18 leshort <0x40 MS-DOS executable -# These traditional tests usually work but not always. When test quality support is -# implemented these can be turned on. -#>>0x18 leshort 0x1c (Borland compiler) -#>>0x18 leshort 0x1e (MS compiler) - -# If the relocation table is 0x40 or more bytes into the file, it's definitely -# not a DOS EXE. ->0x18 leshort >0x3f - -# Maybe it's a PE? ->>(0x3c.l) string PE\0\0 PE ->>>(0x3c.l+24) leshort 0x010b \b32 executable ->>>(0x3c.l+24) leshort 0x020b \b32+ executable ->>>(0x3c.l+24) leshort 0x0107 ROM image ->>>(0x3c.l+24) default x Unknown PE signature ->>>>&0 leshort x 0x%x ->>>(0x3c.l+22) leshort&0x2000 >0 (DLL) ->>>(0x3c.l+92) leshort 1 (native) ->>>(0x3c.l+92) leshort 2 (GUI) ->>>(0x3c.l+92) leshort 3 (console) ->>>(0x3c.l+92) leshort 7 (POSIX) ->>>(0x3c.l+92) leshort 9 (Windows CE) ->>>(0x3c.l+92) leshort 10 (EFI application) ->>>(0x3c.l+92) leshort 11 (EFI boot service driver) ->>>(0x3c.l+92) leshort 12 (EFI runtime driver) ->>>(0x3c.l+92) leshort 13 (EFI ROM) ->>>(0x3c.l+92) leshort 14 (XBOX) ->>>(0x3c.l+92) leshort 15 (Windows boot application) ->>>(0x3c.l+92) default x (Unknown subsystem ->>>>&0 leshort x 0x%x) ->>>(0x3c.l+4) leshort 0x14c Intel 80386 ->>>(0x3c.l+4) leshort 0x166 MIPS R4000 ->>>(0x3c.l+4) leshort 0x168 MIPS R10000 ->>>(0x3c.l+4) leshort 0x184 Alpha ->>>(0x3c.l+4) leshort 0x1a2 Hitachi SH3 ->>>(0x3c.l+4) leshort 0x1a6 Hitachi SH4 ->>>(0x3c.l+4) leshort 0x1c0 ARM ->>>(0x3c.l+4) leshort 0x1c2 ARM Thumb ->>>(0x3c.l+4) leshort 0x1c4 ARMv7 Thumb ->>>(0x3c.l+4) leshort 0x1f0 PowerPC ->>>(0x3c.l+4) leshort 0x200 Intel Itanium ->>>(0x3c.l+4) leshort 0x266 MIPS16 ->>>(0x3c.l+4) leshort 0x268 Motorola 68000 ->>>(0x3c.l+4) leshort 0x290 PA-RISC ->>>(0x3c.l+4) leshort 0x366 MIPSIV ->>>(0x3c.l+4) leshort 0x466 MIPS16 with FPU ->>>(0x3c.l+4) leshort 0xebc EFI byte code ->>>(0x3c.l+4) leshort 0x8664 x86-64 ->>>(0x3c.l+4) leshort 0xc0ee MSIL ->>>(0x3c.l+4) default x Unknown processor type ->>>>&0 leshort x 0x%x ->>>(0x3c.l+22) leshort&0x0200 >0 (stripped to external PDB) ->>>(0x3c.l+22) leshort&0x1000 >0 system file ->>>(0x3c.l+24) leshort 0x010b ->>>>(0x3c.l+232) lelong >0 Mono/.Net assembly ->>>(0x3c.l+24) leshort 0x020b ->>>>(0x3c.l+248) lelong >0 Mono/.Net assembly - -# hooray, there's a DOS extender using the PE format, with a valid PE -# executable inside (which just prints a message and exits if run in win) ->>>(8.s*16) string 32STUB \b, 32rtm DOS extender ->>>(8.s*16) string !32STUB \b, for MS Windows ->>>(0x3c.l+0xf8) string UPX0 \b, UPX compressed ->>>(0x3c.l+0xf8) search/0x140 PEC2 \b, PECompact2 compressed ->>>(0x3c.l+0xf8) search/0x140 UPX2 ->>>>(&0x10.l+(-4)) string PK\3\4 \b, ZIP self-extracting archive (Info-Zip) ->>>(0x3c.l+0xf8) search/0x140 .idata ->>>>(&0xe.l+(-4)) string PK\3\4 \b, ZIP self-extracting archive (Info-Zip) ->>>>(&0xe.l+(-4)) string ZZ0 \b, ZZip self-extracting archive ->>>>(&0xe.l+(-4)) string ZZ1 \b, ZZip self-extracting archive ->>>(0x3c.l+0xf8) search/0x140 .rsrc ->>>>(&0x0f.l+(-4)) string a\\\4\5 \b, WinHKI self-extracting archive ->>>>(&0x0f.l+(-4)) string Rar! \b, RAR self-extracting archive ->>>>(&0x0f.l+(-4)) search/0x3000 MSCF \b, InstallShield self-extracting archive ->>>>(&0x0f.l+(-4)) search/32 Nullsoft \b, Nullsoft Installer self-extracting archive ->>>(0x3c.l+0xf8) search/0x140 .data ->>>>(&0x0f.l) string WEXTRACT \b, MS CAB-Installer self-extracting archive ->>>(0x3c.l+0xf8) search/0x140 .petite\0 \b, Petite compressed ->>>>(0x3c.l+0xf7) byte x ->>>>>(&0x104.l+(-4)) string =!sfx! \b, ACE self-extracting archive ->>>(0x3c.l+0xf8) search/0x140 .WISE \b, WISE installer self-extracting archive ->>>(0x3c.l+0xf8) search/0x140 .dz\0\0\0 \b, Dzip self-extracting archive ->>>&(0x3c.l+0xf8) search/0x100 _winzip_ \b, ZIP self-extracting archive (WinZip) ->>>&(0x3c.l+0xf8) search/0x100 SharedD \b, Microsoft Installer self-extracting archive ->>>0x30 string Inno \b, InnoSetup self-extracting archive - -# Hmm, not a PE but the relocation table is too high for a traditional DOS exe, -# must be one of the unusual subformats. ->>(0x3c.l) string !PE\0\0 MS-DOS executable - ->>(0x3c.l) string NE \b, NE ->>>(0x3c.l+0x36) byte 1 for OS/2 1.x ->>>(0x3c.l+0x36) byte 2 for MS Windows 3.x ->>>(0x3c.l+0x36) byte 3 for MS-DOS ->>>(0x3c.l+0x36) byte 4 for Windows 386 ->>>(0x3c.l+0x36) byte 5 for Borland Operating System Services ->>>(0x3c.l+0x36) default x ->>>>(0x3c.l+0x36) byte x (unknown OS %x) ->>>(0x3c.l+0x36) byte 0x81 for MS-DOS, Phar Lap DOS extender ->>>(0x3c.l+0x0c) leshort&0x8003 0x8002 (DLL) ->>>(0x3c.l+0x0c) leshort&0x8003 0x8001 (driver) ->>>&(&0x24.s-1) string ARJSFX \b, ARJ self-extracting archive ->>>(0x3c.l+0x70) search/0x80 WinZip(R)\ Self-Extractor \b, ZIP self-extracting archive (WinZip) - ->>(0x3c.l) string LX\0\0 \b, LX ->>>(0x3c.l+0x0a) leshort <1 (unknown OS) ->>>(0x3c.l+0x0a) leshort 1 for OS/2 ->>>(0x3c.l+0x0a) leshort 2 for MS Windows ->>>(0x3c.l+0x0a) leshort 3 for DOS ->>>(0x3c.l+0x0a) leshort >3 (unknown OS) ->>>(0x3c.l+0x10) lelong&0x28000 =0x8000 (DLL) ->>>(0x3c.l+0x10) lelong&0x20000 >0 (device driver) ->>>(0x3c.l+0x10) lelong&0x300 0x300 (GUI) ->>>(0x3c.l+0x10) lelong&0x28300 <0x300 (console) ->>>(0x3c.l+0x08) leshort 1 i80286 ->>>(0x3c.l+0x08) leshort 2 i80386 ->>>(0x3c.l+0x08) leshort 3 i80486 ->>>(8.s*16) string emx \b, emx ->>>>&1 string x %s ->>>&(&0x54.l-3) string arjsfx \b, ARJ self-extracting archive - -# MS Windows system file, supposedly a collection of LE executables ->>(0x3c.l) string W3 \b, W3 for MS Windows - ->>(0x3c.l) string LE\0\0 \b, LE executable ->>>(0x3c.l+0x0a) leshort 1 -# some DOS extenders use LE files with OS/2 header ->>>>0x240 search/0x100 DOS/4G for MS-DOS, DOS4GW DOS extender ->>>>0x240 search/0x200 WATCOM\ C/C++ for MS-DOS, DOS4GW DOS extender ->>>>0x440 search/0x100 CauseWay\ DOS\ Extender for MS-DOS, CauseWay DOS extender ->>>>0x40 search/0x40 PMODE/W for MS-DOS, PMODE/W DOS extender ->>>>0x40 search/0x40 STUB/32A for MS-DOS, DOS/32A DOS extender (stub) ->>>>0x40 search/0x80 STUB/32C for MS-DOS, DOS/32A DOS extender (configurable stub) ->>>>0x40 search/0x80 DOS/32A for MS-DOS, DOS/32A DOS extender (embedded) -# this is a wild guess; hopefully it is a specific signature ->>>>&0x24 lelong <0x50 ->>>>>(&0x4c.l) string \xfc\xb8WATCOM ->>>>>>&0 search/8 3\xdbf\xb9 \b, 32Lite compressed -# another wild guess: if real OS/2 LE executables exist, they probably have higher start EIP -#>>>>(0x3c.l+0x1c) lelong >0x10000 for OS/2 -# fails with DOS-Extenders. ->>>(0x3c.l+0x0a) leshort 2 for MS Windows ->>>(0x3c.l+0x0a) leshort 3 for DOS ->>>(0x3c.l+0x0a) leshort 4 for MS Windows (VxD) ->>>(&0x7c.l+0x26) string UPX \b, UPX compressed ->>>&(&0x54.l-3) string UNACE \b, ACE self-extracting archive - -# looks like ASCII, probably some embedded copyright message. -# and definitely not NE/LE/LX/PE ->>0x3c lelong >0x20000000 ->>>(4.s*512) leshort !0x014c \b, MZ for MS-DOS -# header data too small for extended executable ->2 long !0 ->>0x18 leshort <0x40 ->>>(4.s*512) leshort !0x014c - ->>>>&(2.s-514) string !LE ->>>>>&-2 string !BW \b, MZ for MS-DOS ->>>>&(2.s-514) string LE \b, LE ->>>>>0x240 search/0x100 DOS/4G for MS-DOS, DOS4GW DOS extender -# educated guess since indirection is still not capable enough for complex offset -# calculations (next embedded executable would be at &(&2*512+&0-2) -# I suspect there are only LE executables in these multi-exe files ->>>>&(2.s-514) string BW ->>>>>0x240 search/0x100 DOS/4G ,\b LE for MS-DOS, DOS4GW DOS extender (embedded) ->>>>>0x240 search/0x100 !DOS/4G ,\b BW collection for MS-DOS - -# This sequence skips to the first COFF segment, usually .text ->(4.s*512) leshort 0x014c \b, COFF ->>(8.s*16) string go32stub for MS-DOS, DJGPP go32 DOS extender ->>(8.s*16) string emx ->>>&1 string x for DOS, Win or OS/2, emx %s ->>&(&0x42.l-3) byte x ->>>&0x26 string UPX \b, UPX compressed -# and yet another guess: small .text, and after large .data is unusal, could be 32lite ->>&0x2c search/0xa0 .text ->>>&0x0b lelong <0x2000 ->>>>&0 lelong >0x6000 \b, 32lite compressed - ->(8.s*16) string $WdX \b, WDos/X DOS extender - -# By now an executable type should have been printed out. The executable -# may be a self-uncompressing archive, so look for evidence of that and -# print it out. -# -# Some signatures below from Greg Roelofs, newt@uchicago.edu. -# ->0x35 string \x8e\xc0\xb9\x08\x00\xf3\xa5\x4a\x75\xeb\x8e\xc3\x8e\xd8\x33\xff\xbe\x30\x00\x05 \b, aPack compressed ->0xe7 string LH/2\ Self-Extract \b, %s ->0x1c string UC2X \b, UCEXE compressed ->0x1c string WWP\ \b, WWPACK compressed ->0x1c string RJSX \b, ARJ self-extracting archive ->0x1c string diet \b, diet compressed ->0x1c string LZ09 \b, LZEXE v0.90 compressed ->0x1c string LZ91 \b, LZEXE v0.91 compressed ->0x1c string tz \b, TinyProg compressed ->0x1e string Copyright\ 1989-1990\ PKWARE\ Inc. Self-extracting PKZIP archive -!:mime application/zip -# Yes, this really is "Copr", not "Corp." ->0x1e string PKLITE\ Copr. Self-extracting PKZIP archive -!:mime application/zip -# winarj stores a message in the stub instead of the sig in the MZ header ->0x20 search/0xe0 aRJsfX \b, ARJ self-extracting archive ->0x20 string AIN ->>0x23 string 2 \b, AIN 2.x compressed ->>0x23 string <2 \b, AIN 1.x compressed ->>0x23 string >2 \b, AIN 1.x compressed ->0x24 string LHa's\ SFX \b, LHa self-extracting archive -!:mime application/x-lha ->0x24 string LHA's\ SFX \b, LHa self-extracting archive -!:mime application/x-lha ->0x24 string \ $ARX \b, ARX self-extracting archive ->0x24 string \ $LHarc \b, LHarc self-extracting archive ->0x20 string SFX\ by\ LARC \b, LARC self-extracting archive ->0x40 string aPKG \b, aPackage self-extracting archive ->0x64 string W\ Collis\0\0 \b, Compack compressed ->0x7a string Windows\ self-extracting\ ZIP \b, ZIP self-extracting archive ->>&0xf4 search/0x140 \x0\x40\x1\x0 ->>>(&0.l+(4)) string MSCF \b, WinHKI CAB self-extracting archive ->1638 string -lh5- \b, LHa self-extracting archive v2.13S ->0x17888 string Rar! \b, RAR self-extracting archive - -# Skip to the end of the EXE. This will usually work fine in the PE case -# because the MZ image is hardcoded into the toolchain and almost certainly -# won't match any of these signatures. ->(4.s*512) long x ->>&(2.s-517) byte x ->>>&0 string PK\3\4 \b, ZIP self-extracting archive ->>>&0 string Rar! \b, RAR self-extracting archive ->>>&0 string =!\x11 \b, AIN 2.x self-extracting archive ->>>&0 string =!\x12 \b, AIN 2.x self-extracting archive ->>>&0 string =!\x17 \b, AIN 1.x self-extracting archive ->>>&0 string =!\x18 \b, AIN 1.x self-extracting archive ->>>&7 search/400 **ACE** \b, ACE self-extracting archive ->>>&0 search/0x480 UC2SFX\ Header \b, UC2 self-extracting archive - -# a few unknown ZIP sfxes, no idea if they are needed or if they are -# already captured by the generic patterns above ->(8.s*16) search/0x20 PKSFX \b, ZIP self-extracting archive (PKZIP) -# TODO: how to add this? >FileSize-34 string Windows\ Self-Installing\ Executable \b, ZIP self-extracting archive -# - -# TELVOX Teleinformatica CODEC self-extractor for OS/2: ->49801 string \x79\xff\x80\xff\x76\xff \b, CODEC archive v3.21 ->>49824 leshort =1 \b, 1 file ->>49824 leshort >1 \b, %u files - -# Popular applications -2080 string Microsoft\ Word\ 6.0\ Document %s -!:mime application/msword -2080 string Documento\ Microsoft\ Word\ 6 Spanish Microsoft Word 6 document data -!:mime application/msword -# Pawel Wiecek (for polish Word) -2112 string MSWordDoc Microsoft Word document data -!:mime application/msword -# -0 belong 0x31be0000 Microsoft Word Document -!:mime application/msword -# -0 string/b PO^Q` Microsoft Word 6.0 Document -!:mime application/msword -# -0 string/b \376\067\0\043 Microsoft Office Document -!:mime application/msword -0 string/b \333\245-\0\0\0 Microsoft Office Document -!:mime application/msword -512 string/b \354\245\301 Microsoft Word Document -!:mime application/msword - -# -0 string/b \xDB\xA5\x2D\x00 Microsoft WinWord 2.0 Document -!:mime application/msword -# -2080 string Microsoft\ Excel\ 5.0\ Worksheet %s -!:mime application/vnd.ms-excel -# -0 string/b \xDB\xA5\x2D\x00 Microsoft WinWord 2.0 Document -!:mime application/msword - -2080 string Foglio\ di\ lavoro\ Microsoft\ Exce %s -!:mime application/vnd.ms-excel -# -# Pawel Wiecek (for polish Excel) -2114 string Biff5 Microsoft Excel 5.0 Worksheet -!:mime application/vnd.ms-excel -# Italian MS-Excel -2121 string Biff5 Microsoft Excel 5.0 Worksheet -!:mime application/vnd.ms-excel -0 string/b \x09\x04\x06\x00\x00\x00\x10\x00 Microsoft Excel Worksheet -!:mime application/vnd.ms-excel -# -0 belong 0x00001a00 Lotus 1-2-3 -!:mime application/x-123 -# -0 belong 0x00000200 Lotus 1-2-3 -!:mime application/x-123 -0 string/b WordPro\0 Lotus WordPro -!:mime application/vnd.lotus-wordpro -0 string/b WordPro\r\373 Lotus WordPro -!:mime application/vnd.lotus-wordpro - -# Windows icons (Ian Springer ) -0 string/b \000\000\001\000 MS Windows icon resource -!:mime image/x-icon - -# .PIF files added by Joerg Jenderek from http://smsoft.ru/en/pifdoc.htm -# only for windows versions equal or greater 3.0 -0x171 string MICROSOFT\ PIFEX\0 Windows Program Information File -!:mime application/x-dosexec - -# TNEF magic From "Joomy" -# Microsoft Outlook's Transport Neutral Encapsulation Format (TNEF) -0 leshort 0x223e9f78 TNEF -!:mime application/vnd.ms-tnef - -#------------------------------------------------------------------------------ -# From Stuart Caie (developer of cabextract) -# Microsoft Cabinet files -0 string/b MSCF\0\0\0\0 Microsoft Cabinet archive data -!:mime application/vnd.ms-cab-compressed - -# from http://filext.com by Derek M Jones -# False positive with PPT (also currently this string is too long) -#0 string/b \xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3E\x00\x03\x00\xFE\xFF\x09\x00\x06 Microsoft Installer -0 string/b \320\317\021\340\241\261\032\341 Microsoft Office Document -!:mime application/msword -#>48 byte 0x1B Excel Document -#!:mime application/vnd.ms-excel -#>546 string bjbj Microsoft Word Document -#!:mime application/msword -#>546 string jbjb Microsoft Word Document -#!:mime application/msword - -0 string/b \224\246\056 Microsoft Word Document -!:mime application/msword - -512 string R\0o\0o\0t\0\ \0E\0n\0t\0r\0y Microsoft Word Document -!:mime application/msword - -# MS eBook format (.lit) -0 string/b ITOLITLS Microsoft Reader eBook Data ->8 lelong x \b, version %u -!:mime application/x-ms-reader diff --git a/magic/neko b/magic/neko deleted file mode 100644 index 50163a0861..0000000000 --- a/magic/neko +++ /dev/null @@ -1,12 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------ -# $File: java,v 1.12 2009/09/19 16:28:10 christos Exp $ - -# From: Mikhail Gusarov -# NekoVM (http://nekovm.org/) bytecode -0 string NEKO NekoVM bytecode ->4 lelong x (%d global symbols, ->8 lelong x %d global fields, ->12 lelong x %d bytecode ops) -!:mime application/x-nekovm-bytecode - diff --git a/magic/pascal b/magic/pascal deleted file mode 100644 index 911eea3c0c..0000000000 --- a/magic/pascal +++ /dev/null @@ -1,11 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File$ -# pascal: file(1) magic for Pascal source -# -0 search/8192 (input, Pascal source text -!:mime text/x-pascal -0 regex \^program Pascal source text -!:mime text/x-pascal -0 regex \^record Pascal source text -!:mime text/x-pascal diff --git a/magic/pdf b/magic/pdf deleted file mode 100644 index 761006ffe6..0000000000 --- a/magic/pdf +++ /dev/null @@ -1,8 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File$ -# pdf: file(1) magic for Portable Document Format -# - -0 string %PDF- PDF document -!:mime application/pdf diff --git a/magic/perl b/magic/perl deleted file mode 100644 index 12ec33b73a..0000000000 --- a/magic/perl +++ /dev/null @@ -1,26 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: perl,v 1.19 2012/06/20 21:16:25 christos Exp $ -# perl: file(1) magic for Larry Wall's perl language. -# -# The `eval' lines recognizes an outrageously clever hack. -# Keith Waclena -# Send additions to -0 search/1/w #!\ /bin/perl Perl script text executable -!:mime text/x-perl -0 search/1 eval\ "exec\ /bin/perl Perl script text -!:mime text/x-perl -0 search/1/w #!\ /usr/bin/perl Perl script text executable -!:mime text/x-perl -0 search/1 eval\ "exec\ /usr/bin/perl Perl script text -!:mime text/x-perl -0 search/1/w #!\ /usr/local/bin/perl Perl script text executable -!:mime text/x-perl -0 search/1 eval\ "exec\ /usr/local/bin/perl Perl script text -!:mime text/x-perl -0 search/1 eval\ '(exit\ $?0)'\ &&\ eval\ 'exec Perl script text -!:mime text/x-perl -0 search/1 #!/usr/bin/env\ perl Perl script text executable -!:mime text/x-perl -0 search/1 #!\ /usr/bin/env\ perl Perl script text executable -!:mime text/x-perl diff --git a/magic/pgp b/magic/pgp deleted file mode 100644 index 2bdfb77981..0000000000 --- a/magic/pgp +++ /dev/null @@ -1,27 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File$ -# pgp: file(1) magic for Pretty Good Privacy -# see http://lists.gnupg.org/pipermail/gnupg-devel/1999-September/016052.html -# -0 beshort 0x9900 PGP key public ring -!:mime application/x-pgp-keyring -0 beshort 0x9501 PGP key security ring -!:mime application/x-pgp-keyring -0 beshort 0x9500 PGP key security ring -!:mime application/x-pgp-keyring -0 beshort 0xa600 PGP encrypted data -#!:mime application/pgp-encrypted -#0 string -----BEGIN\040PGP text/PGP armored data -!:mime text/PGP # encoding: armored data -#>15 string PUBLIC\040KEY\040BLOCK- public key block -#>15 string MESSAGE- message -#>15 string SIGNED\040MESSAGE- signed message -#>15 string PGP\040SIGNATURE- signature - -2 string ---BEGIN\ PGP\ PUBLIC\ KEY\ BLOCK- PGP public key block -!:mime application/pgp-keys -0 string -----BEGIN\040PGP\40MESSAGE- PGP message -!:mime application/pgp -0 string -----BEGIN\040PGP\40SIGNATURE- PGP signature -!:mime application/pgp-signature diff --git a/magic/pkgadd b/magic/pkgadd deleted file mode 100644 index 602b4ec21d..0000000000 --- a/magic/pkgadd +++ /dev/null @@ -1,7 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File$ -# pkgadd: file(1) magic for SysV R4 PKG Datastreams -# -0 string #\ PaCkAgE\ DaTaStReAm pkg Datastream (SVR4) -!:mime application/x-svr4-package diff --git a/magic/printer b/magic/printer deleted file mode 100644 index cdce275b12..0000000000 --- a/magic/printer +++ /dev/null @@ -1,14 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: printer,v 1.24 2011/05/08 16:34:51 christos Exp $ -# printer: file(1) magic for printer-formatted files -# - -# PostScript, updated by Daniel Quinlan (quinlan@yggdrasil.com) -0 string %! PostScript document text -!:mime application/postscript -!:apple ASPSTEXT -# Some PCs have the annoying habit of adding a ^D as a document separator -0 string \004%! PostScript document text -!:mime application/postscript -!:apple ASPSTEXT diff --git a/magic/python b/magic/python deleted file mode 100644 index 1cd724bc59..0000000000 --- a/magic/python +++ /dev/null @@ -1,46 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: python,v 1.21 2012/06/21 01:12:51 christos Exp $ -# python: file(1) magic for python -# - -0 search/1/w #!\ /usr/bin/python Python script text executable -!:mime text/x-python -0 search/1/w #!\ /usr/local/bin/python Python script text executable -!:mime text/x-python -0 search/1 #!/usr/bin/env\ python Python script text executable -!:mime text/x-python -0 search/1 #!\ /usr/bin/env\ python Python script text executable -!:mime text/x-python - -# from module.submodule import func1, func2 -0 regex \^from\\s+(\\w|\\.)+\\s+import.*$ Python script text executable -!:mime text/x-python - -# def __init__ (self, ...): -0 search/4096 def\ __init__ ->&0 search/64 self Python script text executable -!:mime text/x-python - -# comments -0 search/4096 ''' ->&0 regex .*'''$ Python script text executable -!:mime text/x-python - -0 search/4096 """ ->&0 regex .*"""$ Python script text executable -!:mime text/x-python - -# try: -# except: or finally: -# block -0 search/4096 try: ->&0 regex \^\\s*except.*: Python script text executable -!:mime text/x-python ->&0 search/4096 finally: Python script text executable -!:mime text/x-python - -# def name(args, args): -0 regex \^(\ |\\t)*def\ +[a-zA-Z]+ ->&0 regex \ *\\(([a-zA-Z]|,|\ )*\\):$ Python script text executable -!:mime text/x-python diff --git a/magic/riff b/magic/riff deleted file mode 100644 index 929dc9aa89..0000000000 --- a/magic/riff +++ /dev/null @@ -1,36 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: riff,v 1.22 2011/09/06 11:00:06 christos Exp $ -# riff: file(1) magic for RIFF format -# See -# -# http://www.seanet.com/users/matts/riffmci/riffmci.htm -# -# AVI section extended by Patrik Radman -# -0 string RIFF RIFF (little-endian) data -# Microsoft WAVE format (*.wav) ->8 string WAVE \b, WAVE audio -!:mime audio/x-wav -# Corel Draw Picture ->8 string CDRA \b, Corel Draw Picture -!:mime image/x-coreldraw -# AVI == Audio Video Interleave ->8 string AVI\040 \b, AVI -!:mime video/x-msvideo - -#------------------------------------------------------------------------------ -# Sony Wave64 -# see http://www.vcs.de/fileadmin/user_upload/MBS/PDF/Whitepaper/Informations_about_Sony_Wave64.pdf -# 128 bit RIFF-GUID { 66666972-912E-11CF-A5D6-28DB04C10000 } in little-endian -0 string riff\x2E\x91\xCF\x11\xA5\xD6\x28\xDB\x04\xC1\x00\x00 Sony Wave64 RIFF data -# 128 bit + total file size (64 bits) so 24 bytes -# then WAVE-GUID { 65766177-ACF3-11D3-8CD1-00C04F8EDB8A } ->24 string wave\xF3\xAC\xD3\x11\x8C\xD1\x00\xC0\x4F\x8E\xDB\x8A \b, WAVE 64 audio -!:mime audio/x-w64 - -#------------------------------------------------------------------------------ -# MBWF/RF64 -# see EBU TECH 3306 http://tech.ebu.ch/docs/tech/tech3306-2009.pdf -0 string RF64\xff\xff\xff\xffWAVEds64 MBWF/RF64 audio -!:mime audio/x-wav diff --git a/magic/rpm b/magic/rpm deleted file mode 100644 index 2558ebeef1..0000000000 --- a/magic/rpm +++ /dev/null @@ -1,12 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: rpm,v 1.11 2011/06/14 12:47:41 christos Exp $ -# -# RPM: file(1) magic for Red Hat Packages Erik Troan (ewt@redhat.com) -# -0 belong 0xedabeedb RPM -!:mime application/x-rpm - -#delta RPM Daniel Novotny (dnovotny@redhat.com) -0 string drpm Delta RPM -!:mime application/x-rpm diff --git a/magic/rtf b/magic/rtf deleted file mode 100644 index 0719264e47..0000000000 --- a/magic/rtf +++ /dev/null @@ -1,9 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File$ -# rtf: file(1) magic for Rich Text Format (RTF) -# -# Duncan P. Simpson, D.P.Simpson@dcs.warwick.ac.uk -# -0 string {\\rtf Rich Text Format data, -!:mime text/rtf diff --git a/magic/ruby b/magic/ruby deleted file mode 100644 index 41682a89ad..0000000000 --- a/magic/ruby +++ /dev/null @@ -1,28 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: ruby,v 1.4 2010/07/08 20:24:13 christos Exp $ -# ruby: file(1) magic for Ruby scripting language -# URL: http://www.ruby-lang.org/ -# From: Reuben Thomas - -# Ruby scripts -0 search/1/w #!\ /usr/bin/ruby Ruby script text executable -!:mime text/x-ruby -0 search/1/w #!\ /usr/local/bin/ruby Ruby script text executable -!:mime text/x-ruby -0 search/1 #!/usr/bin/env\ ruby Ruby script text executable -!:mime text/x-ruby -0 search/1 #!\ /usr/bin/env\ ruby Ruby script text executable -!:mime text/x-ruby - -# What looks like ruby, but does not have a shebang -# (modules and such) -# From: Lubomir Rintel -0 regex \^[\ \t]*require[\ \t]'[A-Za-z_/]+' ->0 regex include\ [A-Z]|def\ [a-z]|\ do$ ->>0 regex \^[\ \t]*end([\ \t]*[;#].*)?$ Ruby script text -!:mime text/x-ruby -0 regex \^[\ \t]*(class|module)[\ \t][A-Z] ->0 regex (modul|includ)e\ [A-Z]|def\ [a-z] ->>0 regex \^[\ \t]*end([\ \t]*[;#].*)?$ Ruby module source text -!:mime text/x-ruby diff --git a/magic/sc b/magic/sc deleted file mode 100644 index 75333b3916..0000000000 --- a/magic/sc +++ /dev/null @@ -1,7 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File$ -# sc: file(1) magic for "sc" spreadsheet -# -38 string Spreadsheet sc spreadsheet file -!:mime application/x-sc diff --git a/magic/sgml b/magic/sgml deleted file mode 100644 index 64efa2c153..0000000000 --- a/magic/sgml +++ /dev/null @@ -1,82 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: sgml,v 1.28 2012/04/28 21:20:26 christos Exp $ -# Type: SVG Vectorial Graphics -# From: Noel Torres -0 string \15 string >\0 ->>19 search/4096 \>19 search/4096 \15 string >\0 ->>19 search/4096/cWbt \15 string >\0 ->>19 search/4096/cWbt \15 string >\0 ->>19 search/4096/cWbt \ - -# Although we may know the offset of certain text fields in TeX DVI -# and font files, we can't use them reliably because they are not -# zero terminated. [but we do anyway, christos] -0 string \367\002 TeX DVI file -!:mime application/x-dvi - -# There is no way to detect TeX Font Metric (*.tfm) files without -# breaking them apart and reading the data. The following patterns -# match most *.tfm files generated by METAFONT or afm2tfm. -2 string \000\021 TeX font metric data -!:mime application/x-tex-tfm -2 string \000\022 TeX font metric data -!:mime application/x-tex-tfm - -# Texinfo and GNU Info, from Daniel Quinlan (quinlan@yggdrasil.com) -0 search/1 \\input\ texinfo Texinfo source text -!:mime text/x-texinfo -0 search/1 This\ is\ Info\ file GNU Info text -!:mime text/x-info - -# TeX documents, from Daniel Quinlan (quinlan@yggdrasil.com) -0 search/4096 \\input TeX document text -!:mime text/x-tex -!:strength + 15 -0 search/4096 \\section LaTeX document text -!:mime text/x-tex -!:strength + 18 -0 search/4096 \\setlength LaTeX document text -!:mime text/x-tex -!:strength + 15 -0 search/4096 \\documentstyle LaTeX document text -!:mime text/x-tex -!:strength + 18 -0 search/4096 \\chapter LaTeX document text -!:mime text/x-tex -!:strength + 18 -0 search/4096 \\documentclass LaTeX 2e document text -!:mime text/x-tex -!:strength + 15 -0 search/4096 \\relax LaTeX auxiliary file -!:mime text/x-tex -!:strength + 15 -0 search/4096 \\contentsline LaTeX table of contents -!:mime text/x-tex -!:strength + 15 -0 search/4096 %\ -*-latex-*- LaTeX document text -!:mime text/x-tex diff --git a/magic/troff b/magic/troff deleted file mode 100644 index 7f60b1d9b3..0000000000 --- a/magic/troff +++ /dev/null @@ -1,22 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File$ -# troff: file(1) magic for *roff -# -# updated by Daniel Quinlan (quinlan@yggdrasil.com) - -# troff input -0 search/1 .\\" troff or preprocessor input text -!:mime text/troff -0 search/1 '\\" troff or preprocessor input text -!:mime text/troff -0 search/1 '.\\" troff or preprocessor input text -!:mime text/troff -0 search/1 \\" troff or preprocessor input text -!:mime text/troff -0 search/1 ''' troff or preprocessor input text -!:mime text/troff -0 regex/20 \^\\.[A-Za-z0-9][A-Za-z0-9][\ \t] troff or preprocessor input text -!:mime text/troff -0 regex/20 \^\\.[A-Za-z0-9][A-Za-z0-9]$ troff or preprocessor input text -!:mime text/troff diff --git a/magic/vorbis b/magic/vorbis deleted file mode 100644 index 4d25c3c3cd..0000000000 --- a/magic/vorbis +++ /dev/null @@ -1,26 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File$ -# vorbis: file(1) magic for Ogg/Vorbis files -# -# From Felix von Leitner -# Extended by Beni Cherniavsky -# Further extended by Greg Wooledge -# -# Most (everything but the number of channels and bitrate) is commented -# out with `##' as it's not interesting to the average user. The most -# probable things advanced users would want to uncomment are probably -# the number of comments and the encoder version. -# -# FIXME: The first match has been made a search, so that it can skip -# over prepended ID3 tags. This will work for MIME type detection, but -# won't work for detecting other properties of the file (they all need -# to be made relative to the search). In any case, if the file has ID3 -# tags, the ID3 information will be printed, not the Ogg information, -# so until that's fixed, this doesn't matter. -# FIXME[2]: Disable the above for now, since search assumes text mode. -# -# --- Ogg Framing --- -#0 search/1000 OggS Ogg data -0 string OggS Ogg data -!:mime application/ogg diff --git a/magic/warc b/magic/warc deleted file mode 100644 index 2a2aeb6fae..0000000000 --- a/magic/warc +++ /dev/null @@ -1,14 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: warc,v 1.2 2009/09/19 16:28:13 christos Exp $ -# warc: file(1) magic for WARC files - -0 string WARC/ WARC Archive ->5 string x version %.4s -!:mime application/warc - -#------------------------------------------------------------------------------ -# Arc File Format from Internet Archive -# see http://www.archive.org/web/researcher/ArcFileFormat.php -0 string filedesc:// Internet Archive File -!:mime application/x-ia-arc diff --git a/magic/windows b/magic/windows deleted file mode 100644 index 6a529782a9..0000000000 --- a/magic/windows +++ /dev/null @@ -1,19 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: windows,v 1.4 2009/09/19 16:28:13 christos Exp $ -# windows: file(1) magic for Microsoft Windows -# -# This file is mainly reserved for files where programs -# using them are run almost always on MS Windows 3.x or -# above, or files only used exclusively in Windows OS, -# where there is no better category to allocate for. -# For example, even though WinZIP almost run on Windows -# only, it is better to treat them as "archive" instead. -# For format usable in DOS, such as generic executable -# format, please specify under "msdos" file. -# - -# From: Pal Tamas -# Autorun File -0 string/c [autorun]\r\n Microsoft Windows Autorun file. -!:mime application/x-setupscript. diff --git a/magic/wordprocessors b/magic/wordprocessors deleted file mode 100644 index 7de3413c0a..0000000000 --- a/magic/wordprocessors +++ /dev/null @@ -1,43 +0,0 @@ -# See COPYING file in this directory for original libmagic copyright. -#------------------------------------------------------------------------------ -# $File: wordprocessors,v 1.16 2012/10/29 17:36:49 christos Exp $ -# wordprocessors: file(1) magic fo word processors. -# - -# Hangul (Korean) Word Processor File -# From: Won-Kyu Park -512 string R\0o\0o\0t\0 Hangul (Korean) Word Processor File 2000 -!:mime application/x-hwp - -# Quark Express from http://www.garykessler.net/library/file_sigs.html -2 string MMXPR3 Motorola Quark Express Document (English) -!:mime application/x-quark-xpress-3 - -#------------------------------------------------------------------------------ -# ichitaro456: file(1) magic for Just System Word Processor Ichitaro -# -# Contributor kenzo-: -# Reversed-engineered JS Ichitaro magic numbers -# - -0 string DOC ->43 byte 0x14 Just System Word Processor Ichitaro v4 -!:mime application/x-ichitaro4 - -0 string DOC ->43 byte 0x15 Just System Word Processor Ichitaro v5 -!:mime application/x-ichitaro5 - -0 string DOC ->43 byte 0x16 Just System Word Processor Ichitaro v6 -!:mime application/x-ichitaro6 - -# Type: Freemind mindmap documents -# From: Jamie Thompson -0 string/w \ -0 string \ Date: Wed, 10 Jul 2013 16:29:07 -0400 Subject: [PATCH 059/118] Added support for files to the notice framework. --- scripts/base/frameworks/notice/main.bro | 34 ++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/scripts/base/frameworks/notice/main.bro b/scripts/base/frameworks/notice/main.bro index 30e0013517..f47ed79940 100644 --- a/scripts/base/frameworks/notice/main.bro +++ b/scripts/base/frameworks/notice/main.bro @@ -68,6 +68,19 @@ export { ## the notice policy. iconn: icmp_conn &optional; + ## A file record if the notice is relted to a file. The + ## reference to the actual fa_file record will be deleted after applying + ## the notice policy. + f: fa_file &optional; + + ## A file unique ID if this notice is related to a file. If the $f + ## field is provided, this will be automatically filled out. + fuid: string &log &optional; + + ## A mime type if the notice is related to a file. If the $f field + ## is provided, this will be automatically filled out. + mime_type: string &log &optional; + ## The transport protocol. Filled automatically when either conn, iconn ## or p is specified. proto: transport_proto &log &optional; @@ -460,6 +473,19 @@ function apply_policy(n: Notice::Info) if ( ! n?$ts ) n$ts = network_time(); + if ( n?$f ) + { + if ( ! n?$fuid ) + n$fuid = n$f$id; + if ( ! n?$mime_type && n$f?$mime_type ) + n$mime_type = n$f$mime_type; + if ( |n$f$conns| == 1 ) + { + for ( id in n$f$conns ) + n$conn = n$f$conns[id]; + } + } + if ( n?$conn ) { if ( ! n?$id ) @@ -513,13 +539,15 @@ function apply_policy(n: Notice::Info) if ( ! n?$suppress_for ) n$suppress_for = default_suppression_interval; - # Delete the connection record if it's there so we aren't sending that - # to remote machines. It can cause problems due to the size of the - # connection record. + # Delete the connection and file records if they're there so we + # aren't sending that to remote machines. It can cause problems + # due to the size of those records. if ( n?$conn ) delete n$conn; if ( n?$iconn ) delete n$iconn; + if ( n?$f ) + delete n$f; } function internal_NOTICE(n: Notice::Info) From 22b4f8dd90f1b5b1262240efb94a3f65546ace04 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Wed, 10 Jul 2013 16:51:22 -0400 Subject: [PATCH 060/118] Fix a small issue with finding smtp entities. --- scripts/base/protocols/smtp/entities.bro | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/base/protocols/smtp/entities.bro b/scripts/base/protocols/smtp/entities.bro index ec43b39ce1..159c10b5ab 100644 --- a/scripts/base/protocols/smtp/entities.bro +++ b/scripts/base/protocols/smtp/entities.bro @@ -33,12 +33,12 @@ event mime_begin_entity(c: connection) &priority=10 event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5 { - if ( f$source != "SMTP" ) - return; - - if ( c$smtp$entity?$filename ) - f$info$filename = c$smtp$entity$filename; - f$info$depth = c$smtp_state$mime_depth; + if ( f$source == "SMTP" && c?$smtp ) + { + if ( c$smtp?$entity && c$smtp$entity?$filename ) + f$info$filename = c$smtp$entity$filename; + f$info$depth = c$smtp_state$mime_depth; + } } event mime_one_header(c: connection, h: mime_header_rec) &priority=5 From bf4f57383f5f0639257ecac3651c7b01004a3a02 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Wed, 10 Jul 2013 16:52:39 -0400 Subject: [PATCH 061/118] Improve malware hash registry script. - Include a link to a virustotal search in the notice sub message field. - Give all information returned from Team Cymru in the notice message. - Add more file types to match on to the default set. --- .../policy/frameworks/files/detect-MHR.bro | 34 +++++++------------ 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/scripts/policy/frameworks/files/detect-MHR.bro b/scripts/policy/frameworks/files/detect-MHR.bro index c896bd56fd..ebfc97fd26 100644 --- a/scripts/policy/frameworks/files/detect-MHR.bro +++ b/scripts/policy/frameworks/files/detect-MHR.bro @@ -5,7 +5,7 @@ @load base/frameworks/notice @load frameworks/files/hash-all-files -module MalwareHashRegistery; +module TeamCymruMalwareHashRegistry; export { redef enum Notice::Type += { @@ -14,16 +14,12 @@ export { Match }; - redef record Files::Info += { - ## Team Cymru Malware Hash Registry date of first detection. - mhr_first_detected: time &log &optional; - ## Team Cymru Malware Hash Registry percent of detection - ## among malware scanners. - mhr_detect_rate: count &log &optional; - }; - ## File types to attempt matching against the Malware Hash Registry. - const match_file_types = /^application\/x-dosexec/ &redef; + const match_file_types = /application\/x-dosexec/ | + /application\/pdf/ | + /application\/x-shockwave-flash/ | + /application\/x-java-applet/ | + /video\/mp4/ &redef; ## The malware hash registry runs each malware sample through several A/V engines. ## Team Cymru returns a percentage to indicate how many A/V engines flagged the @@ -43,19 +39,15 @@ event file_hash(f: fa_file, kind: string, hash: string) local MHR_answer = split1(MHR_result, / /); if ( |MHR_answer| == 2 ) { - f$info$mhr_first_detected = double_to_time(to_double(MHR_answer[1])); - f$info$mhr_detect_rate = to_count(MHR_answer[2]); + local mhr_first_detected = double_to_time(to_double(MHR_answer[1])); + local mhr_detect_rate = to_count(MHR_answer[2]); - #print strftime("%Y-%m-%d %H:%M:%S", f$info$mhr_first_detected); - if ( f$info$mhr_detect_rate >= notice_threshold ) + local readable_first_detected = strftime("%Y-%m-%d %H:%M:%S", mhr_first_detected); + if ( mhr_detect_rate >= notice_threshold ) { - local url = ""; - # TODO: Create a generic mechanism for creating file "urls". - #if ( f$source == "HTTP" ) - # url = HTTP::build_url_http(f); - local message = fmt("%s %s", hash, url); - #local message = fmt("Host(s) %s sent a file with SHA1 hash %s to host %s", f$src_host, hash, f$dst_host); - NOTICE([$note=Match, $msg=message]); + local message = fmt("Detection rate: %d%% Last seen: %s", mhr_detect_rate, readable_first_detected); + local virustotal_url = fmt("https://www.virustotal.com/en/file/%s/analysis/", hash); + NOTICE([$note=Match, $msg=message, $sub=virustotal_url, $f=f]); } } } From be8c947c040ac828036a70938bcc3f721a5a480d Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Wed, 10 Jul 2013 17:04:09 -0400 Subject: [PATCH 062/118] Adding CAB files for MHR checking. --- scripts/policy/frameworks/files/detect-MHR.bro | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/policy/frameworks/files/detect-MHR.bro b/scripts/policy/frameworks/files/detect-MHR.bro index ebfc97fd26..18875ade4c 100644 --- a/scripts/policy/frameworks/files/detect-MHR.bro +++ b/scripts/policy/frameworks/files/detect-MHR.bro @@ -16,6 +16,7 @@ export { ## File types to attempt matching against the Malware Hash Registry. const match_file_types = /application\/x-dosexec/ | + /application\/vnd.ms-cab-compressed/ | /application\/pdf/ | /application\/x-shockwave-flash/ | /application\/x-java-applet/ | From 06287966a166d9a2d33a84084898009bfe86eea3 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Wed, 10 Jul 2013 14:19:00 -0700 Subject: [PATCH 063/118] Bringing the DPD POP3 signature back. This also avoids the need for updating the external test suite. --- scripts/base/init-default.bro | 1 + scripts/base/protocols/pop3/__load__.bro | 2 ++ scripts/base/protocols/pop3/dpd.sig | 13 +++++++++++++ .../canonified_loaded_scripts.log | 5 +++-- 4 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 scripts/base/protocols/pop3/__load__.bro create mode 100644 scripts/base/protocols/pop3/dpd.sig diff --git a/scripts/base/init-default.bro b/scripts/base/init-default.bro index 6c40a7547f..6aa8ff5e26 100644 --- a/scripts/base/init-default.bro +++ b/scripts/base/init-default.bro @@ -41,6 +41,7 @@ @load base/protocols/http @load base/protocols/irc @load base/protocols/modbus +@load base/protocols/pop3 @load base/protocols/smtp @load base/protocols/socks @load base/protocols/ssh diff --git a/scripts/base/protocols/pop3/__load__.bro b/scripts/base/protocols/pop3/__load__.bro new file mode 100644 index 0000000000..c5ddf0e788 --- /dev/null +++ b/scripts/base/protocols/pop3/__load__.bro @@ -0,0 +1,2 @@ + +@load-sigs ./dpd.sig diff --git a/scripts/base/protocols/pop3/dpd.sig b/scripts/base/protocols/pop3/dpd.sig new file mode 100644 index 0000000000..8d7e3567da --- /dev/null +++ b/scripts/base/protocols/pop3/dpd.sig @@ -0,0 +1,13 @@ +signature dpd_pop3_server { + ip-proto == tcp + payload /^\+OK/ + requires-reverse-signature dpd_pop3_client + enable "pop3" + tcp-state responder +} + +signature dpd_pop3_client { + ip-proto == tcp + payload /(|.*[\r\n])[[:space:]]*([uU][sS][eE][rR][[:space:]]|[aA][pP][oO][pP][[:space:]]|[cC][aA][pP][aA]|[aA][uU][tT][hH])/ + tcp-state originator +} diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log index 6d6906d924..999fd7c841 100644 --- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2013-07-10-03-19-58 +#open 2013-07-10-21-18-31 #fields name #types string scripts/base/init-bare.bro @@ -178,6 +178,7 @@ scripts/base/init-default.bro scripts/base/protocols/modbus/__load__.bro scripts/base/protocols/modbus/consts.bro scripts/base/protocols/modbus/main.bro + scripts/base/protocols/pop3/__load__.bro scripts/base/protocols/smtp/__load__.bro scripts/base/protocols/smtp/main.bro scripts/base/protocols/smtp/entities.bro @@ -194,4 +195,4 @@ scripts/base/init-default.bro scripts/base/protocols/tunnels/__load__.bro scripts/base/misc/find-checksum-offloading.bro scripts/policy/misc/loaded-scripts.bro -#close 2013-07-10-03-19-58 +#close 2013-07-10-21-18-31 From 3d5c17e9e01b812398d5cc928c63d883d2a89d55 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Wed, 10 Jul 2013 23:46:01 -0400 Subject: [PATCH 064/118] Add jar files to the default MHR lookups. --- scripts/policy/frameworks/files/detect-MHR.bro | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/policy/frameworks/files/detect-MHR.bro b/scripts/policy/frameworks/files/detect-MHR.bro index 18875ade4c..71d73217e0 100644 --- a/scripts/policy/frameworks/files/detect-MHR.bro +++ b/scripts/policy/frameworks/files/detect-MHR.bro @@ -20,6 +20,7 @@ export { /application\/pdf/ | /application\/x-shockwave-flash/ | /application\/x-java-applet/ | + /application\/jar/ | /video\/mp4/ &redef; ## The malware hash registry runs each malware sample through several A/V engines. From 1a60fae41c057bb150604d53fa6a15ed3bf2b629 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Thu, 11 Jul 2013 11:28:55 -0500 Subject: [PATCH 065/118] Clean up queued but unused file_over_new_connections event args. --- src/file_analysis/File.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc index 9a06fa3db9..7189d90932 100644 --- a/src/file_analysis/File.cc +++ b/src/file_analysis/File.cc @@ -100,7 +100,12 @@ File::~File() { DBG_LOG(DBG_FILE_ANALYSIS, "Destroying File object %s", id.c_str()); Unref(val); - assert(fonc_queue.empty()); + // Queue may not be empty in the case where only content gaps were seen. + while ( ! fonc_queue.empty() ) + { + delete_vals(fonc_queue.front().second); + fonc_queue.pop(); + } } void File::UpdateLastActivityTime() From e01678d132a7fcb90c45701d110733bcc6ab84e4 Mon Sep 17 00:00:00 2001 From: Bernhard Amann Date: Fri, 12 Jul 2013 21:09:13 +0200 Subject: [PATCH 066/118] yep, freebsd still needs this fix --- src/3rdparty/sqlite3.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/3rdparty/sqlite3.c b/src/3rdparty/sqlite3.c index deef460899..8d473d32b7 100644 --- a/src/3rdparty/sqlite3.c +++ b/src/3rdparty/sqlite3.c @@ -23442,6 +23442,9 @@ static int posixFchown(int fd, uid_t uid, gid_t gid){ /* Forward reference */ static int openDirectory(const char*, int*); +/* fix compile on FreeBSD, not sure why needed... */ +int fchmod(int, mode_t); + /* ** Many system calls are accessed through pointer-to-functions so that ** they may be overridden at runtime to facilitate fault injection during From b14f5a853eb67a5e312bc612a062889b594d1a58 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Fri, 12 Jul 2013 16:06:40 -0400 Subject: [PATCH 067/118] Added mime types to http.log --- scripts/base/protocols/http/entities.bro | 54 +++++++++++++++++++++--- scripts/base/protocols/http/files.bro | 19 --------- 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/scripts/base/protocols/http/entities.bro b/scripts/base/protocols/http/entities.bro index fc8ab753ae..dcddf6fc4f 100644 --- a/scripts/base/protocols/http/entities.bro +++ b/scripts/base/protocols/http/entities.bro @@ -9,14 +9,23 @@ module HTTP; export { type Entity: record { - ## Depth of the entity if multiple entities are sent in a single transaction. - depth: count &default=0; - ## Filename for the entity if discovered from a header. filename: string &optional; }; redef record Info += { + ## An ordered vector of file unique IDs. + orig_fuids: vector of string &log &optional; + + ## An ordered vector of mime types. + orig_mime_types: vector of string &log &optional; + + ## An ordered vector of file unique IDs. + resp_fuids: vector of string &log &optional; + + ## An ordered vector of mime types. + resp_mime_types: vector of string &log &optional; + ## The current entity being seen. entity: Entity &optional; @@ -36,7 +45,7 @@ event http_begin_entity(c: connection, is_orig: bool) &priority=10 else ++c$http$resp_mime_depth; - c$http$entity = Entity($depth = is_orig ? c$http$orig_mime_depth : c$http$resp_mime_depth); + c$http$entity = Entity(); } event http_header(c: connection, is_orig: bool, name: string, value: string) &priority=3 @@ -55,12 +64,43 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5 { - if ( f$source == "HTTP" && c$http?$entity ) + if ( f$source == "HTTP" && c?$http ) { - f$info$depth = c$http$entity$depth; - if ( c$http$entity?$filename ) + if ( c$http?$entity && c$http$entity?$filename ) f$info$filename = c$http$entity$filename; + + if ( f$is_orig ) + { + if ( ! c$http?$resp_mime_types ) + c$http$resp_fuids = string_vec(f$id); + else + c$http$orig_fuids[|c$http$orig_fuids|] = f$id; + + if ( f?$mime_type ) + { + if ( ! c$http?$orig_mime_types ) + c$http$orig_mime_types = string_vec(f$mime_type); + else + c$http$orig_mime_types[|c$http$orig_mime_types|] = f$mime_type; + } + } + else + { + if ( ! c$http?$resp_mime_types ) + c$http$resp_fuids = string_vec(f$id); + else + c$http$resp_fuids[|c$http$resp_fuids|] = f$id; + + if ( f?$mime_type ) + { + if ( ! c$http?$resp_mime_types ) + c$http$resp_mime_types = string_vec(f$mime_type); + else + c$http$resp_mime_types[|c$http$resp_mime_types|] = f$mime_type; + } + } } + } event http_end_entity(c: connection, is_orig: bool) &priority=5 diff --git a/scripts/base/protocols/http/files.bro b/scripts/base/protocols/http/files.bro index e45ff8cadb..09324b5f45 100644 --- a/scripts/base/protocols/http/files.bro +++ b/scripts/base/protocols/http/files.bro @@ -6,14 +6,6 @@ module HTTP; export { - redef record Info += { - ## An ordered vector of file unique IDs seen sent by the originator (client). - orig_fuids: vector of string &log &default=string_vec(); - - ## An ordered vector of file unique IDs seen sent by the responder (server). - resp_fuids: vector of string &log &default=string_vec(); - }; - ## Default file handle provider for HTTP. global get_file_handle: function(c: connection, is_orig: bool): string; } @@ -39,14 +31,3 @@ event bro_init() &priority=5 { Files::register_protocol(Analyzer::ANALYZER_HTTP, HTTP::get_file_handle); } - -event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5 - { - if ( c?$http ) - { - if ( f$is_orig ) - c$http$orig_fuids[|c$http$orig_fuids|] = f$id; - else - c$http$resp_fuids[|c$http$resp_fuids|] = f$id; - } - } From 4dd4c5344e071cf2f9996852369fa2a5a90909bd Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Fri, 12 Jul 2013 16:12:26 -0400 Subject: [PATCH 068/118] Fix a bug where orig file information in http wasn't working right. --- scripts/base/protocols/http/entities.bro | 31 ++++++++++++------------ 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/scripts/base/protocols/http/entities.bro b/scripts/base/protocols/http/entities.bro index dcddf6fc4f..e9376a0c0c 100644 --- a/scripts/base/protocols/http/entities.bro +++ b/scripts/base/protocols/http/entities.bro @@ -15,24 +15,23 @@ export { redef record Info += { ## An ordered vector of file unique IDs. - orig_fuids: vector of string &log &optional; + orig_fuids: vector of string &log &optional; ## An ordered vector of mime types. orig_mime_types: vector of string &log &optional; ## An ordered vector of file unique IDs. - resp_fuids: vector of string &log &optional; + resp_fuids: vector of string &log &optional; ## An ordered vector of mime types. resp_mime_types: vector of string &log &optional; - ## The current entity being seen. - entity: Entity &optional; - + ## The current entity. + current_entity: Entity &optional; ## Current number of MIME entities in the HTTP request message body. - orig_mime_depth: count &default=0; + orig_mime_depth: count &default=0; ## Current number of MIME entities in the HTTP response message body. - resp_mime_depth: count &default=0; + resp_mime_depth: count &default=0; }; } @@ -45,7 +44,7 @@ event http_begin_entity(c: connection, is_orig: bool) &priority=10 else ++c$http$resp_mime_depth; - c$http$entity = Entity(); + c$http$current_entity = Entity(); } event http_header(c: connection, is_orig: bool, name: string, value: string) &priority=3 @@ -53,12 +52,12 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr if ( name == "CONTENT-DISPOSITION" && /[fF][iI][lL][eE][nN][aA][mM][eE]/ in value ) { - c$http$entity$filename = extract_filename_from_content_disposition(value); + c$http$current_entity$filename = extract_filename_from_content_disposition(value); } else if ( name == "CONTENT-TYPE" && /[nN][aA][mM][eE][:blank:]*=/ in value ) { - c$http$entity$filename = extract_filename_from_content_disposition(value); + c$http$current_entity$filename = extract_filename_from_content_disposition(value); } } @@ -66,13 +65,13 @@ event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priori { if ( f$source == "HTTP" && c?$http ) { - if ( c$http?$entity && c$http$entity?$filename ) - f$info$filename = c$http$entity$filename; + if ( c$http?$current_entity && c$http$current_entity?$filename ) + f$info$filename = c$http$current_entity$filename; if ( f$is_orig ) { - if ( ! c$http?$resp_mime_types ) - c$http$resp_fuids = string_vec(f$id); + if ( ! c$http?$orig_mime_types ) + c$http$orig_fuids = string_vec(f$id); else c$http$orig_fuids[|c$http$orig_fuids|] = f$id; @@ -105,6 +104,6 @@ event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priori event http_end_entity(c: connection, is_orig: bool) &priority=5 { - if ( c?$http && c$http?$entity ) - delete c$http$entity; + if ( c?$http && c$http?$current_entity ) + delete c$http$current_entity; } From 58290d6fc0436677df760792a9cda9b0c99def11 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Sun, 14 Jul 2013 08:42:35 -0700 Subject: [PATCH 069/118] Updating NEWS. --- NEWS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS b/NEWS index b5fea869e4..1fce6b1d9d 100644 --- a/NEWS +++ b/NEWS @@ -189,6 +189,8 @@ Changed Functionality - PacketFilter::all_packets has been replaced with PacketFilter::enable_auto_protocol_capture_filters. +- We removed the BitTorrent DPD signatures pending further updates to + that analyzer. Bro 2.1 ------- From 7427ce511b78c8ae5656762ad8c229976dd33fd3 Mon Sep 17 00:00:00 2001 From: Bernhard Amann Date: Mon, 15 Jul 2013 13:50:40 -0700 Subject: [PATCH 070/118] Small raw reader fixes * crash when accessing nonexistant file. * memory leak when reading from file. Addresses #1038. --- src/input/readers/Raw.cc | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/input/readers/Raw.cc b/src/input/readers/Raw.cc index 46cb3656a3..98f1dfcab6 100644 --- a/src/input/readers/Raw.cc +++ b/src/input/readers/Raw.cc @@ -55,6 +55,13 @@ void Raw::DoClose() if ( file != 0 ) CloseInput(); + if ( buf != 0 ) + { + // we still have output that has not been flushed. Throw away. + delete buf; + buf = 0; + } + if ( execute && childpid > 0 && kill(childpid, 0) == 0 ) { // kill child process @@ -157,13 +164,13 @@ bool Raw::OpenInput() else { file = fopen(fname.c_str(), "r"); - fcntl(fileno(file), F_SETFD, FD_CLOEXEC); if ( ! file ) { Error(Fmt("Init: cannot open %s", fname.c_str())); return false; } } + fcntl(fileno(file), F_SETFD, FD_CLOEXEC); return true; } @@ -322,12 +329,14 @@ int64_t Raw::GetLine(FILE* arg_file) // but first check if we encountered the file end - because if we did this was it. if ( feof(arg_file) != 0 ) { - outbuf = buf; - buf = 0; if ( pos == 0 ) return -1; // signal EOF - and that we had no more data. else + { + outbuf = buf; + buf = 0; return pos; + } } repeats++; @@ -342,15 +351,13 @@ int64_t Raw::GetLine(FILE* arg_file) { outbuf = buf; buf = 0; - buf = new char[block_size]; - if ( found < pos ) { // we have leftovers. copy them into the buffer for the next line buf = new char[block_size]; memcpy(buf, outbuf + found + sep_length, pos - found - sep_length); - bufpos = pos - found - sep_length; + bufpos = pos - found - sep_length; } return found; @@ -368,7 +375,7 @@ int64_t Raw::GetLine(FILE* arg_file) return -3; } - InternalError("Internal control flow execution"); + InternalError("Internal control flow execution error in raw reader"); assert(false); } @@ -461,7 +468,7 @@ bool Raw::DoUpdate() if ( length == -3 ) return false; - else if ( length == -2 || length == -1 ) + else if ( length == -2 || length == -1 ) // no data ready or eof break; From 0bfdcc1fbca326e563ea4a6db5e69be05f2fbed5 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Tue, 16 Jul 2013 12:01:50 -0400 Subject: [PATCH 071/118] Added protocol description functions that provide a super compressed log representation. --- scripts/base/frameworks/analyzer/main.bro | 12 +++++ scripts/base/frameworks/files/main.bro | 50 ++++++++++++++++---- scripts/base/frameworks/notice/main.bro | 19 ++++++-- scripts/base/protocols/ftp/__load__.bro | 1 + scripts/base/protocols/ftp/files.bro | 21 +++++++- scripts/base/protocols/ftp/main.bro | 50 +++++++------------- scripts/base/protocols/http/files.bro | 21 +++++++- scripts/base/protocols/http/utils.bro | 8 ++++ scripts/base/protocols/irc/file-analysis.bro | 23 --------- scripts/base/protocols/irc/files.bro | 3 +- scripts/base/protocols/smtp/files.bro | 21 +++++++- scripts/base/protocols/smtp/main.bro | 31 +++++++++++- src/analyzer/analyzer.bif | 5 ++ 13 files changed, 190 insertions(+), 75 deletions(-) delete mode 100644 scripts/base/protocols/irc/file-analysis.bro diff --git a/scripts/base/frameworks/analyzer/main.bro b/scripts/base/frameworks/analyzer/main.bro index c4ee5c943b..e266eb8c7a 100644 --- a/scripts/base/frameworks/analyzer/main.bro +++ b/scripts/base/frameworks/analyzer/main.bro @@ -81,6 +81,13 @@ export { ## Returns: The analyzer name corresponding to the tag. global name: function(tag: Analyzer::Tag) : string; + ## Translates an analyzer's name to a tag enum value. + ## + ## name: The analyzer name. + ## + ## Returns: The analyzer tag corresponding to the name. + global get_tag: function(name: string): Analyzer::Tag; + ## Schedules an analyzer for a future connection originating from a given IP ## address and port. ## @@ -187,6 +194,11 @@ function name(atype: Analyzer::Tag) : string return __name(atype); } +function get_tag(name: string): Analyzer::Tag + { + return __tag(name); + } + function schedule_analyzer(orig: addr, resp: addr, resp_p: port, analyzer: Analyzer::Tag, tout: interval) : bool { diff --git a/scripts/base/frameworks/files/main.bro b/scripts/base/frameworks/files/main.bro index 8dd07fcb53..cc92932bbf 100644 --- a/scripts/base/frameworks/files/main.bro +++ b/scripts/base/frameworks/files/main.bro @@ -2,6 +2,7 @@ ##! any network protocol over which they're transported. @load base/bif/file_analysis.bif +@load base/frameworks/analyzer @load base/frameworks/logging @load base/utils/site @@ -173,17 +174,36 @@ export { ## Returns: The analyzer name corresponding to the tag. global analyzer_name: function(tag: Files::Tag): string; + ## Provides a text description regarding metadata of the file. + ## For example, with HTTP it would return a URL. + ## + ## f: The file to be described. + ## + ## Returns a text description regarding metadata of the file. + global describe: function(f: fa_file): string; + + type ProtoRegistration: record { + ## A callback to generate a file handle on demand when + ## one is needed by the core. + get_file_handle: function(c: connection, is_orig: bool): string; + + ## A callback to "describe" a file. In the case of an HTTP + ## transfer the most obvious description would be the URL. + ## It's like an extremely compressed version of the normal log. + describe: function(f: fa_file): string + &default=function(f: fa_file): string { return ""; }; + }; + ## Register callbacks for protocols that work with the Files framework. ## The callbacks must uniquely identify a file and each protocol can ## only have a single callback registered for it. ## ## tag: Tag for the protocol analyzer having a callback being registered. ## - ## callback: Function that can generate a file handle for the protocol analyzer - ## defined previously. + ## reg: A :bro:see:`ProtoRegistration` record. ## ## Returns: true if the protocol being registered was not previously registered. - global register_protocol: function(tag: Files::Tag, callback: function(c: connection, is_orig: bool): string): bool; + global register_protocol: function(tag: Analyzer::Tag, reg: ProtoRegistration): bool; ## Register a callback for file analyzers to use if they need to do some manipulation ## when they are being added to a file before the core code takes over. This is @@ -210,8 +230,7 @@ redef record AnalyzerArgs += { }; # Store the callbacks for protocol analyzers that have files. -global registered_protocols: table[Files::Tag] of function(c: connection, is_orig: bool): string = table() - &default=function(c: connection, is_orig: bool): string { return cat(c$uid, is_orig); }; +global registered_protocols: table[Analyzer::Tag] of ProtoRegistration = table(); global analyzer_add_callbacks: table[Files::Tag] of function(f: fa_file, args: AnalyzerArgs) = table(); @@ -321,15 +340,28 @@ event file_state_remove(f: fa_file) &priority=-10 Log::write(Files::LOG, f$info); } -function register_protocol(tag: Files::Tag, callback: function(c: connection, is_orig: bool): string): bool +function register_protocol(tag: Analyzer::Tag, reg: ProtoRegistration): bool { local result = (tag !in registered_protocols); - registered_protocols[tag] = callback; + registered_protocols[tag] = reg; return result; } -event get_file_handle(tag: Files::Tag, c: connection, is_orig: bool) &priority=5 +function describe(f: fa_file): string { + local tag = Analyzer::get_tag(f$source); + if ( tag !in registered_protocols ) + return ""; + local handler = registered_protocols[tag]; - set_file_handle(handler(c, is_orig)); + return handler$describe(f); + } + +event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool) &priority=5 + { + if ( tag !in registered_protocols ) + return; + + local handler = registered_protocols[tag]; + set_file_handle(handler$get_file_handle(c, is_orig)); } diff --git a/scripts/base/frameworks/notice/main.bro b/scripts/base/frameworks/notice/main.bro index f47ed79940..5bd01e0982 100644 --- a/scripts/base/frameworks/notice/main.bro +++ b/scripts/base/frameworks/notice/main.bro @@ -79,7 +79,13 @@ export { ## A mime type if the notice is related to a file. If the $f field ## is provided, this will be automatically filled out. - mime_type: string &log &optional; + file_mime_type: string &log &optional; + + ## Frequently files can be "described" to give a bit more context. + ## This field will typically be automatically filled out from an + ## fa_file record. For example, if a notice was related to a + ## file over HTTP, the URL of the request would be shown. + file_desc: string &log &optional; ## The transport protocol. Filled automatically when either conn, iconn ## or p is specified. @@ -477,9 +483,13 @@ function apply_policy(n: Notice::Info) { if ( ! n?$fuid ) n$fuid = n$f$id; - if ( ! n?$mime_type && n$f?$mime_type ) - n$mime_type = n$f$mime_type; - if ( |n$f$conns| == 1 ) + + if ( ! n?$file_mime_type && n$f?$mime_type ) + n$file_mime_type = n$f$mime_type; + + n$file_desc = Files::describe(n$f); + + if ( n$f?$conns && |n$f$conns| == 1 ) { for ( id in n$f$conns ) n$conn = n$f$conns[id]; @@ -490,6 +500,7 @@ function apply_policy(n: Notice::Info) { if ( ! n?$id ) n$id = n$conn$id; + if ( ! n?$uid ) n$uid = n$conn$uid; } diff --git a/scripts/base/protocols/ftp/__load__.bro b/scripts/base/protocols/ftp/__load__.bro index bc68f61cea..ebb09e702c 100644 --- a/scripts/base/protocols/ftp/__load__.bro +++ b/scripts/base/protocols/ftp/__load__.bro @@ -1,5 +1,6 @@ @load ./utils-commands @load ./main +@load ./utils @load ./files @load ./gridftp diff --git a/scripts/base/protocols/ftp/files.bro b/scripts/base/protocols/ftp/files.bro index c68717c8a2..1d7b7670f4 100644 --- a/scripts/base/protocols/ftp/files.bro +++ b/scripts/base/protocols/ftp/files.bro @@ -12,6 +12,9 @@ export { ## Default file handle provider for FTP. global get_file_handle: function(c: connection, is_orig: bool): string; + + ## Describe the file being transferred. + global describe_file: function(f: fa_file): string; } function get_file_handle(c: connection, is_orig: bool): string @@ -22,9 +25,25 @@ function get_file_handle(c: connection, is_orig: bool): string return cat(Analyzer::ANALYZER_FTP_DATA, c$start_time, c$id, is_orig); } +function describe_file(f: fa_file): string + { + # This shouldn't be needed, but just in case... + if ( f$source != "FTP" ) + return ""; + + for ( cid in f$conns ) + { + if ( f$conns[cid]?$ftp ) + return FTP::describe(f$conns[cid]$ftp); + } + return ""; + } + event bro_init() &priority=5 { - Files::register_protocol(Analyzer::ANALYZER_FTP_DATA, FTP::get_file_handle); + Files::register_protocol(Analyzer::ANALYZER_FTP_DATA, + [$get_file_handle = FTP::get_file_handle, + $describe = FTP::describe_file]); } diff --git a/scripts/base/protocols/ftp/main.bro b/scripts/base/protocols/ftp/main.bro index 7bf9d6cc4c..f525c7792b 100644 --- a/scripts/base/protocols/ftp/main.bro +++ b/scripts/base/protocols/ftp/main.bro @@ -63,8 +63,6 @@ export { reply_code: count &log &optional; ## Reply message from the server in response to the command. reply_msg: string &log &optional; - ## Arbitrary tags that may indicate a particular attribute of this command. - tags: set[string] &log; ## Expected FTP data channel. data_channel: ExpectedDataChannel &log &optional; @@ -171,37 +169,22 @@ function set_ftp_session(c: connection) function ftp_message(s: Info) { - # If it either has a tag associated with it (something detected) - # or it's a deliberately logged command. - if ( |s$tags| > 0 || (s?$cmdarg && s$cmdarg$cmd in logged_commands) ) + s$ts=s$cmdarg$ts; + s$command=s$cmdarg$cmd; + s$arg=s$cmdarg$arg; + if ( s$arg == "" ) + delete s$arg; + + if ( s?$password && + ! s$capture_password && + to_lower(s$user) !in guest_ids ) { - if ( s?$password && - ! s$capture_password && - to_lower(s$user) !in guest_ids ) - { - s$password = ""; - } - - local arg = s$cmdarg$arg; - if ( s$cmdarg$cmd in file_cmds ) - { - local comp_path = build_path_compressed(s$cwd, arg); - if ( comp_path[0] != "/" ) - comp_path = cat("/", comp_path); - - arg = fmt("ftp://%s%s", addr_to_uri(s$id$resp_h), comp_path); - } - - s$ts=s$cmdarg$ts; - s$command=s$cmdarg$cmd; - if ( arg == "" ) - delete s$arg; - else - s$arg=arg; - - Log::write(FTP::LOG, s); + s$password = ""; } + if ( s?$cmdarg && s$command in logged_commands) + Log::write(FTP::LOG, s); + # The MIME and file_size fields are specific to file transfer commands # and may not be used in all commands so they need reset to "blank" # values after logging. @@ -209,8 +192,6 @@ function ftp_message(s: Info) delete s$file_size; # Same with data channel. delete s$data_channel; - # Tags are cleared everytime too. - s$tags = set(); } function add_expected_data_channel(s: Info, chan: ExpectedDataChannel) @@ -218,8 +199,9 @@ function add_expected_data_channel(s: Info, chan: ExpectedDataChannel) s$passive = chan$passive; s$data_channel = chan; ftp_data_expected[chan$resp_h, chan$resp_p] = s; - Analyzer::schedule_analyzer(chan$orig_h, chan$resp_h, chan$resp_p, Analyzer::ANALYZER_FTP_DATA, - 5mins); + Analyzer::schedule_analyzer(chan$orig_h, chan$resp_h, chan$resp_p, + Analyzer::ANALYZER_FTP_DATA, + 5mins); } event ftp_request(c: connection, command: string, arg: string) &priority=5 diff --git a/scripts/base/protocols/http/files.bro b/scripts/base/protocols/http/files.bro index 09324b5f45..fd07dc096a 100644 --- a/scripts/base/protocols/http/files.bro +++ b/scripts/base/protocols/http/files.bro @@ -8,6 +8,9 @@ module HTTP; export { ## Default file handle provider for HTTP. global get_file_handle: function(c: connection, is_orig: bool): string; + + ## Default file describer for HTTP. + global describe_file: function(f: fa_file): string; } function get_file_handle(c: connection, is_orig: bool): string @@ -27,7 +30,23 @@ function get_file_handle(c: connection, is_orig: bool): string } } +function describe_file(f: fa_file): string + { + # This shouldn't be needed, but just in case... + if ( f$source != "HTTP" ) + return ""; + + for ( cid in f$conns ) + { + if ( f$conns[cid]?$http ) + return build_url_http(f$conns[cid]$http); + } + return ""; + } + event bro_init() &priority=5 { - Files::register_protocol(Analyzer::ANALYZER_HTTP, HTTP::get_file_handle); + Files::register_protocol(Analyzer::ANALYZER_HTTP, + [$get_file_handle = HTTP::get_file_handle, + $describe = HTTP::describe_file]); } diff --git a/scripts/base/protocols/http/utils.bro b/scripts/base/protocols/http/utils.bro index a74a2fe696..fe8c076780 100644 --- a/scripts/base/protocols/http/utils.bro +++ b/scripts/base/protocols/http/utils.bro @@ -32,6 +32,9 @@ export { ## ## Returns: A URL prefixed with "http://". global build_url_http: function(rec: Info): string; + + ## Create an extremely shortened representation of a log line. + global describe: function(rec: Info): string; } @@ -62,3 +65,8 @@ function build_url_http(rec: Info): string { return fmt("http://%s", build_url(rec)); } + +function describe(rec: Info): string + { + return build_url_http(rec); + } diff --git a/scripts/base/protocols/irc/file-analysis.bro b/scripts/base/protocols/irc/file-analysis.bro deleted file mode 100644 index f2e84fbc22..0000000000 --- a/scripts/base/protocols/irc/file-analysis.bro +++ /dev/null @@ -1,23 +0,0 @@ -@load ./dcc-send -@load base/utils/conn-ids -@load base/frameworks/files - -module IRC; - -export { - ## Default file handle provider for IRC. - global get_file_handle: function(c: connection, is_orig: bool): string; -} - -function get_file_handle(c: connection, is_orig: bool): string - { - if ( [c$id$resp_h, c$id$resp_p] !in dcc_expected_transfers ) - return ""; - - return cat(ANALYZER_IRC_DATA, c$start_time, c$id, is_orig); - } - -event bro_init() &priority=5 - { - Files::register_protocol(ANALYZER_IRC_DATA, IRC::get_file_handle); - } diff --git a/scripts/base/protocols/irc/files.bro b/scripts/base/protocols/irc/files.bro index 8708270bfd..a6321d3f2f 100644 --- a/scripts/base/protocols/irc/files.bro +++ b/scripts/base/protocols/irc/files.bro @@ -24,7 +24,8 @@ function get_file_handle(c: connection, is_orig: bool): string event bro_init() &priority=5 { - Files::register_protocol(Analyzer::ANALYZER_IRC_DATA, IRC::get_file_handle); + Files::register_protocol(Analyzer::ANALYZER_IRC_DATA, + [$get_file_handle = IRC::get_file_handle]); } event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5 diff --git a/scripts/base/protocols/smtp/files.bro b/scripts/base/protocols/smtp/files.bro index 1cf9ec01e1..f9ae2ab05f 100644 --- a/scripts/base/protocols/smtp/files.bro +++ b/scripts/base/protocols/smtp/files.bro @@ -14,6 +14,9 @@ export { ## Default file handle provider for SMTP. global get_file_handle: function(c: connection, is_orig: bool): string; + + ## Default file describer for SMTP. + global describe_file: function(f: fa_file): string; } function get_file_handle(c: connection, is_orig: bool): string @@ -22,9 +25,25 @@ function get_file_handle(c: connection, is_orig: bool): string c$smtp_state$mime_depth); } +function describe_file(f: fa_file): string + { + # This shouldn't be needed, but just in case... + if ( f$source != "SMTP" ) + return ""; + + for ( cid in f$conns ) + { + local c = f$conns[cid]; + return SMTP::describe(c$smtp); + } + return ""; + } + event bro_init() &priority=5 { - Files::register_protocol(Analyzer::ANALYZER_SMTP, SMTP::get_file_handle); + Files::register_protocol(Analyzer::ANALYZER_SMTP, + [$get_file_handle = SMTP::get_file_handle, + $describe = SMTP::describe_file]); } event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5 diff --git a/scripts/base/protocols/smtp/main.bro b/scripts/base/protocols/smtp/main.bro index d53128b06c..702cb9fc0e 100644 --- a/scripts/base/protocols/smtp/main.bro +++ b/scripts/base/protocols/smtp/main.bro @@ -72,7 +72,10 @@ export { ## ALL_HOSTS - always capture the entire path. ## NO_HOSTS - never capture the path. const mail_path_capture = ALL_HOSTS &redef; - + + ## Create an extremely shortened representation of a log line. + global describe: function(rec: Info): string; + global log_smtp: event(rec: Info); } @@ -268,3 +271,29 @@ event connection_state_remove(c: connection) &priority=-5 if ( c?$smtp ) smtp_message(c); } + +function describe(rec: Info): string + { + if ( rec?$mailfrom && rec?$rcptto ) + { + local one_to = ""; + for ( to in rec$rcptto ) + { + one_to = to; + break; + } + local abbrev_subject = ""; + if ( rec?$subject ) + { + if ( |rec$subject| > 20 ) + { + abbrev_subject = rec$subject[0:20] + "..."; + } + } + + return fmt("%s -> %s%s%s", rec$mailfrom, one_to, + (|rec$rcptto|>1 ? fmt(" (plus %d others)", |rec$rcptto|-1) : ""), + (abbrev_subject != "" ? fmt(": %s", abbrev_subject) : "")); + } + return ""; + } \ No newline at end of file diff --git a/src/analyzer/analyzer.bif b/src/analyzer/analyzer.bif index 7f3cc6ed94..8b5a85956c 100644 --- a/src/analyzer/analyzer.bif +++ b/src/analyzer/analyzer.bif @@ -43,3 +43,8 @@ function __name%(atype: Analyzer::Tag%) : string %{ return new StringVal(analyzer_mgr->GetAnalyzerName(atype)); %} + +function __tag%(name: string%) : Analyzer::Tag + %{ + return new Val(analyzer_mgr->GetAnalyzerTag(name->CheckString()), TYPE_ENUM); + %} From eb7ceb3e9ac4bc49f7a337bcc56046350aaa89d5 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Tue, 16 Jul 2013 12:07:33 -0400 Subject: [PATCH 072/118] Forgot a file. --- scripts/base/protocols/ftp/utils.bro | 47 ++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 scripts/base/protocols/ftp/utils.bro diff --git a/scripts/base/protocols/ftp/utils.bro b/scripts/base/protocols/ftp/utils.bro new file mode 100644 index 0000000000..629b87e5a8 --- /dev/null +++ b/scripts/base/protocols/ftp/utils.bro @@ -0,0 +1,47 @@ +##! Utilities specific for FTP processing. + +@load ./main +@load base/utils/addrs + +module FTP; + +export { + ## Creates a URL from an :bro:type:`FTP::Info` record. + ## + ## rec: An :bro:type:`FTP::Info` record. + ## + ## Returns: A URL, not prefixed by "ftp://". + global build_url: function(rec: Info): string; + + ## Creates a URL from an :bro:type:`FTP::Info` record. + ## + ## rec: An :bro:type:`FTP::Info` record. + ## + ## Returns: A URL prefixed with "ftp://". + global build_url_ftp: function(rec: Info): string; + + ## Create an extremely shortened representation of a log line. + global describe: function(rec: Info): string; +} + +function build_url(rec: Info): string + { + if ( !rec?$arg ) + return ""; + + local comp_path = build_path_compressed(rec$cwd, rec$arg); + if ( comp_path[0] != "/" ) + comp_path = cat("/", comp_path); + + return fmt("%s%s", addr_to_uri(rec$id$resp_h), comp_path); + } + +function build_url_ftp(rec: Info): string + { + return fmt("ftp://%s", build_url(rec)); + } + +function describe(rec: Info): string + { + return build_url_ftp(rec); + } \ No newline at end of file From 57b05a2989d32e87147686b39480240e5162e405 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Wed, 17 Jul 2013 17:30:35 -0700 Subject: [PATCH 073/118] Small raw reader tweaks that I forgot to commit earlier. --- src/input/readers/Raw.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/input/readers/Raw.cc b/src/input/readers/Raw.cc index 98f1dfcab6..2820923a25 100644 --- a/src/input/readers/Raw.cc +++ b/src/input/readers/Raw.cc @@ -55,7 +55,7 @@ void Raw::DoClose() if ( file != 0 ) CloseInput(); - if ( buf != 0 ) + if ( buf != 0 ) { // we still have output that has not been flushed. Throw away. delete buf; @@ -169,8 +169,8 @@ bool Raw::OpenInput() Error(Fmt("Init: cannot open %s", fname.c_str())); return false; } - } fcntl(fileno(file), F_SETFD, FD_CLOEXEC); + } return true; } @@ -468,7 +468,7 @@ bool Raw::DoUpdate() if ( length == -3 ) return false; - else if ( length == -2 || length == -1 ) + else if ( length == -2 || length == -1 ) // no data ready or eof break; From d8801bb9c4bc8c898a6c0b51ddb7a647076237bc Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Wed, 17 Jul 2013 17:31:16 -0700 Subject: [PATCH 074/118] Canonifying internal order for plugins and their components to make it deterministic. --- aux/btest | 2 +- src/analyzer/Component.h | 2 +- src/file_analysis/Component.h | 2 +- src/plugin/Component.h | 6 +++++ src/plugin/Manager.cc | 9 +++++++ src/plugin/Plugin.cc | 9 +++++++ .../Baseline/core.print-bpf-filters/conn.log | 4 ++-- .../Baseline/core.print-bpf-filters/output | 24 +++++++++---------- 8 files changed, 41 insertions(+), 17 deletions(-) diff --git a/aux/btest b/aux/btest index c2e73c9e1e..b1d4faf239 160000 --- a/aux/btest +++ b/aux/btest @@ -1 +1 @@ -Subproject commit c2e73c9e1efed6bfdf2d977d716c97773c39492e +Subproject commit b1d4faf23900d4753e93a68abbba45ae3bf96d03 diff --git a/src/analyzer/Component.h b/src/analyzer/Component.h index f3d91c7f90..9e12ed347e 100644 --- a/src/analyzer/Component.h +++ b/src/analyzer/Component.h @@ -72,7 +72,7 @@ public: * from what's passed to the constructor but upper-cased and * canonified to allow being part of a script-level ID. */ - const char* Name() const { return name; } + virtual const char* Name() const { return name; } /** * Returns a canonocalized version of the analyzer's name. The diff --git a/src/file_analysis/Component.h b/src/file_analysis/Component.h index 8b79436991..3cdc69efdf 100644 --- a/src/file_analysis/Component.h +++ b/src/file_analysis/Component.h @@ -64,7 +64,7 @@ public: * from what's passed to the constructor but upper-cased and * canonified to allow being part of a script-level ID. */ - const char* Name() const { return name; } + virtual const char* Name() const { return name; } /** * Returns a canonocalized version of the analyzer's name. The diff --git a/src/plugin/Component.h b/src/plugin/Component.h index 4ac448e466..ad02dc7e4b 100644 --- a/src/plugin/Component.h +++ b/src/plugin/Component.h @@ -45,6 +45,12 @@ public: */ component::Type Type() const; + /** + * Returns a descriptive name for the analyzer. This name must be + * unique across all components of the same type. + */ + virtual const char* Name() const = 0; + /** * Returns a textual representation of the component. The default * version just output the type. Derived version should call the diff --git a/src/plugin/Manager.cc b/src/plugin/Manager.cc index 93ed3f2b97..67f4dea2bd 100644 --- a/src/plugin/Manager.cc +++ b/src/plugin/Manager.cc @@ -30,9 +30,18 @@ bool Manager::LoadPluginsFrom(const std::string& dir) return false; } +static bool plugin_cmp(const Plugin* a, const Plugin* b) + { + return a->Name() < b->Name(); + } + bool Manager::RegisterPlugin(Plugin *plugin) { Manager::PluginsInternal()->push_back(plugin); + + // Sort plugins by name to make sure we have a deterministic order. + PluginsInternal()->sort(plugin_cmp); + return true; } diff --git a/src/plugin/Plugin.cc b/src/plugin/Plugin.cc index 084c49f51e..eaac8a3b25 100644 --- a/src/plugin/Plugin.cc +++ b/src/plugin/Plugin.cc @@ -156,9 +156,18 @@ Plugin::component_list Plugin::Components() const return components; } +static bool component_cmp(const Component* a, const Component* b) + { + return a->Name() < b->Name(); + } + void Plugin::AddComponent(Component* c) { components.push_back(c); + + // Sort components by name to make sure we have a deterministic + // order. + components.sort(component_cmp); } void Plugin::AddBifInitFunction(bif_init_func c) diff --git a/testing/btest/Baseline/core.print-bpf-filters/conn.log b/testing/btest/Baseline/core.print-bpf-filters/conn.log index 745673c027..166286203e 100644 --- a/testing/btest/Baseline/core.print-bpf-filters/conn.log +++ b/testing/btest/Baseline/core.print-bpf-filters/conn.log @@ -3,8 +3,8 @@ #empty_field (empty) #unset_field - #path conn -#open 2013-07-08-20-05-18 +#open 2013-07-18-00-18-33 #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes resp_bytes conn_state local_orig missed_bytes history orig_pkts orig_ip_bytes resp_pkts resp_ip_bytes tunnel_parents #types time string addr port addr port enum string interval count count string bool count string count count count count table[string] 1278600802.069419 UWkUyAuUGXf 10.20.80.1 50343 10.0.0.15 80 tcp - 0.004152 9 3429 SF - 0 ShADadfF 7 381 7 3801 (empty) -#close 2013-07-08-20-05-18 +#close 2013-07-18-00-18-33 diff --git a/testing/btest/Baseline/core.print-bpf-filters/output b/testing/btest/Baseline/core.print-bpf-filters/output index 8ccc04b1a7..871719bba8 100644 --- a/testing/btest/Baseline/core.print-bpf-filters/output +++ b/testing/btest/Baseline/core.print-bpf-filters/output @@ -3,38 +3,38 @@ #empty_field (empty) #unset_field - #path packet_filter -#open 2013-07-08-20-05-17 +#open 2013-07-18-00-18-33 #fields ts node filter init success #types time string string bool bool -1373313917.926565 - ip or not ip T T -#close 2013-07-08-20-05-17 +1374106713.105591 - ip or not ip T T +#close 2013-07-18-00-18-33 #separator \x09 #set_separator , #empty_field (empty) #unset_field - #path packet_filter -#open 2013-07-08-20-05-18 +#open 2013-07-18-00-18-33 #fields ts node filter init success #types time string string bool bool -1373313918.205206 - port 42 T T -#close 2013-07-08-20-05-18 +1374106713.385541 - port 42 T T +#close 2013-07-18-00-18-33 #separator \x09 #set_separator , #empty_field (empty) #unset_field - #path packet_filter -#open 2013-07-08-20-05-18 +#open 2013-07-18-00-18-33 #fields ts node filter init success #types time string string bool bool -1373313918.491383 - (vlan) and (ip or not ip) T T -#close 2013-07-08-20-05-18 +1374106713.664282 - (vlan) and (ip or not ip) T T +#close 2013-07-18-00-18-33 #separator \x09 #set_separator , #empty_field (empty) #unset_field - #path packet_filter -#open 2013-07-08-20-05-18 +#open 2013-07-18-00-18-33 #fields ts node filter init success #types time string string bool bool -1373313918.795264 - ((((((((((((udp and port 3544) or (udp and port 514)) or ((tcp and port 2811) or (tcp and port 21))) or (tcp and port 502)) or ((((tcp and port 6669) or (tcp and port 6666)) or (tcp and port 6668)) or (tcp and port 6667))) or (tcp and port 1080)) or ((udp and port 2152) or (udp and port 2123))) or ((((((((tcp and port 631) or (tcp and port 8888)) or (tcp and port 3128)) or (tcp and port 80)) or (tcp and port 1080)) or (tcp and port 8000)) or (tcp and port 81)) or (tcp and port 8080))) or (udp and port 5072)) or ((tcp and port 25) or (tcp and port 587))) or (((((((((((tcp and port 5223) or (tcp and port 585)) or (tcp and port 614)) or (tcp and port 993)) or (tcp and port 636)) or (tcp and port 989)) or (tcp and port 995)) or (tcp and port 443)) or (tcp and port 563)) or (tcp and port 990)) or (tcp and port 992))) or (((((udp and port 5355) or (tcp and port 53)) or (udp and port 5353)) or (udp and port 137)) or (udp and port 53))) or (tcp and port 22) T T -#close 2013-07-08-20-05-18 +1374106713.957005 - ((((((((((((((((((((((tcp and port 5223) or (tcp and port 585)) or (tcp and port 614)) or (tcp and port 993)) or (tcp and port 636)) or (tcp and port 989)) or (tcp and port 995)) or (tcp and port 443)) or (tcp and port 563)) or (tcp and port 990)) or (tcp and port 992)) or ((tcp and port 2811) or (tcp and port 21))) or ((((tcp and port 6669) or (tcp and port 6666)) or (tcp and port 6668)) or (tcp and port 6667))) or ((udp and port 2152) or (udp and port 2123))) or (tcp and port 22)) or (tcp and port 1080)) or ((((((((tcp and port 631) or (tcp and port 8888)) or (tcp and port 3128)) or (tcp and port 80)) or (tcp and port 1080)) or (tcp and port 8000)) or (tcp and port 81)) or (tcp and port 8080))) or (udp and port 5072)) or ((tcp and port 25) or (tcp and port 587))) or (tcp and port 502)) or (udp and port 514)) or (((((udp and port 5355) or (tcp and port 53)) or (udp and port 5353)) or (udp and port 137)) or (udp and port 53))) or (udp and port 3544) T T +#close 2013-07-18-00-18-33 From efd343af8d0122975536308b7a98689f6def42d1 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Wed, 17 Jul 2013 21:55:36 -0700 Subject: [PATCH 075/118] Extending external canonifier to remove fractional values from capture_loss.log. --- testing/scripts/diff-canonifier-external | 9 ++++++++- testing/scripts/diff-remove-fractions | 6 ++++++ 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100755 testing/scripts/diff-remove-fractions diff --git a/testing/scripts/diff-canonifier-external b/testing/scripts/diff-canonifier-external index f4356154e4..37a51fa72f 100755 --- a/testing/scripts/diff-canonifier-external +++ b/testing/scripts/diff-canonifier-external @@ -2,10 +2,17 @@ # # Default canonifier used with the trace-based tests in testing/external/*. +addl="cat" + +if [ "$1" == "capture_loss.log" ]; then + addl="`dirname $0`/diff-remove-fractions" +fi + `dirname $0`/diff-remove-timestamps \ | `dirname $0`/diff-remove-uids \ | `dirname $0`/diff-remove-file-ids \ | `dirname $0`/diff-remove-x509-names \ | `dirname $0`/diff-canon-notice-policy \ - | `dirname $0`/diff-sort + | `dirname $0`/diff-sort \ + | eval $addl diff --git a/testing/scripts/diff-remove-fractions b/testing/scripts/diff-remove-fractions new file mode 100755 index 0000000000..975157913c --- /dev/null +++ b/testing/scripts/diff-remove-fractions @@ -0,0 +1,6 @@ +#! /usr/bin/env bash +# +# Replace fractions of double value (i.e., 3.14 -> 3.x). + +sed 's/\.[0-9]\{1,\}/.X/g' + From c373f93c4f8922c31e8676ba7aa139e594b41bcf Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Wed, 17 Jul 2013 21:57:25 -0700 Subject: [PATCH 076/118] Updating submodule(s). [nomail] --- aux/btest | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aux/btest b/aux/btest index b1d4faf239..ce366206e3 160000 --- a/aux/btest +++ b/aux/btest @@ -1 +1 @@ -Subproject commit b1d4faf23900d4753e93a68abbba45ae3bf96d03 +Subproject commit ce366206e3407e534a786ad572c342e9f9fef26b From 1e32100fed2eac8639453739c376dc070befc9c0 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Thu, 18 Jul 2013 09:24:22 -0400 Subject: [PATCH 077/118] Fixing a dns reporter message in master. --- scripts/base/protocols/dns/main.bro | 5 +++++ .../dns.log | 11 +++++++++++ .../weird.log | 11 +++++++++++ testing/btest/Traces/dns-two-responses.trace | Bin 0 -> 1006 bytes .../base/protocols/dns/duplicate-reponses.bro | 5 +++++ 5 files changed, 32 insertions(+) create mode 100644 testing/btest/Baseline/scripts.base.protocols.dns.duplicate-reponses/dns.log create mode 100644 testing/btest/Baseline/scripts.base.protocols.dns.duplicate-reponses/weird.log create mode 100644 testing/btest/Traces/dns-two-responses.trace create mode 100644 testing/btest/scripts/base/protocols/dns/duplicate-reponses.bro diff --git a/scripts/base/protocols/dns/main.bro b/scripts/base/protocols/dns/main.bro index ea3ec016de..bf47519cd8 100644 --- a/scripts/base/protocols/dns/main.bro +++ b/scripts/base/protocols/dns/main.bro @@ -207,6 +207,11 @@ event DNS::do_reply(c: connection, msg: dns_msg, ans: dns_answer, reply: string) { if ( ans$answer_type == DNS_ANS ) { + if ( ! c?$dns ) + { + event conn_weird("dns_unmatched_reply", c, ""); + hook set_session(c, msg, F); + } c$dns$AA = msg$AA; c$dns$RA = msg$RA; diff --git a/testing/btest/Baseline/scripts.base.protocols.dns.duplicate-reponses/dns.log b/testing/btest/Baseline/scripts.base.protocols.dns.duplicate-reponses/dns.log new file mode 100644 index 0000000000..ca071ee8ef --- /dev/null +++ b/testing/btest/Baseline/scripts.base.protocols.dns.duplicate-reponses/dns.log @@ -0,0 +1,11 @@ +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path dns +#open 2013-07-18-13-21-52 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto trans_id query qclass qclass_name qtype qtype_name rcode rcode_name AA TC RD RA Z answers TTLs rejected +#types time string addr port addr port enum count string count string count string count string bool bool bool bool count vector[string] vector[interval] bool +1363716396.798072 UWkUyAuUGXf 55.247.223.174 27285 222.195.43.124 53 udp 21140 www.cmu.edu 1 C_INTERNET 1 A 0 NOERROR T F F F 1 www-cmu.andrew.cmu.edu,www-cmu-2.andrew.cmu.edu,128.2.10.163,www-cmu.andrew.cmu.edu 86400.000000,5.000000,21600.000000,86400.000000 F +1363716396.798374 UWkUyAuUGXf 55.247.223.174 27285 222.195.43.124 53 udp 21140 - - - - - 0 NOERROR T F F F 0 www-cmu-2.andrew.cmu.edu,128.2.10.163 5.000000,21600.000000 F +#close 2013-07-18-13-21-52 diff --git a/testing/btest/Baseline/scripts.base.protocols.dns.duplicate-reponses/weird.log b/testing/btest/Baseline/scripts.base.protocols.dns.duplicate-reponses/weird.log new file mode 100644 index 0000000000..c7de92f894 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.protocols.dns.duplicate-reponses/weird.log @@ -0,0 +1,11 @@ +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path weird +#open 2013-07-18-13-21-52 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p name addl notice peer +#types time string addr port addr port string string bool string +1363716396.798286 UWkUyAuUGXf 55.247.223.174 27285 222.195.43.124 53 DNS_RR_unknown_type - F bro +1363716396.798374 UWkUyAuUGXf 55.247.223.174 27285 222.195.43.124 53 dns_unmatched_reply - F bro +#close 2013-07-18-13-21-52 diff --git a/testing/btest/Traces/dns-two-responses.trace b/testing/btest/Traces/dns-two-responses.trace new file mode 100644 index 0000000000000000000000000000000000000000..627b0d2ebe091fcec6ffbbcbe8c96019235d718b GIT binary patch literal 1006 zcmca|c+)~A1{MYw`2U}Qff2~jS?Lj2q07S%1Z0CSgWArB$9EYdN*v}ea4@(sFt{8u zVPG&6T-a~^{r7O<@oK8v!(kxxBobIXSnKIW?seWG>KTO#z@` z4L}UC!B3BeVHHpigdt`_>;c)pI7dvGfx$u0(Gz4k$QB0Etf`D0FD3*{X#v^5!T@wQ zh7AXJ7+66z1vW4UvWJI<>pJ^}vN`&>1i6ME5CBT*K_zE1u(C6Qlm*6YiVifDsFGHO zyEMB*YExpCz3~R!=||*l#~<*%XH>z~zW2P!4u#%zr2&tl#auLguaxB}dLF)<_q4?% z-vV8hr4whT+p@C7sj%&|o_X5qrt6X1P%e$Pdk&s{cH8jyj#RD`jWxfv=+v2VaV*~8gd{4Nrnq5V{}mVpeR^BC~ScG899J_ i8K`~%HTK^cz({}rmV^38YTr&-ffjSiMq_RW#vA~4#T8xv literal 0 HcmV?d00001 diff --git a/testing/btest/scripts/base/protocols/dns/duplicate-reponses.bro b/testing/btest/scripts/base/protocols/dns/duplicate-reponses.bro new file mode 100644 index 0000000000..a16235b9a5 --- /dev/null +++ b/testing/btest/scripts/base/protocols/dns/duplicate-reponses.bro @@ -0,0 +1,5 @@ +# This tests the case where the DNS server responded with zero RRs. +# +# @TEST-EXEC: bro -r $TRACES/dns-two-responses.trace +# @TEST-EXEC: btest-diff dns.log +# @TEST-EXEC: btest-diff weird.log \ No newline at end of file From 006e370ee04775c6196b52368c93897402115992 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Thu, 18 Jul 2013 19:58:19 -0700 Subject: [PATCH 078/118] Canonyfying the output of core.print-bpf-filters. I couldn't figure out why it's not stable but it doesn't seem to matter for now unless more such situations show up. --- .../Baseline/core.print-bpf-filters/output | 28 ++++-------- .../Baseline/core.print-bpf-filters/output2 | 43 +++++++++++++++++++ testing/btest/core/print-bpf-filters.bro | 9 +++- 3 files changed, 59 insertions(+), 21 deletions(-) create mode 100644 testing/btest/Baseline/core.print-bpf-filters/output2 diff --git a/testing/btest/Baseline/core.print-bpf-filters/output b/testing/btest/Baseline/core.print-bpf-filters/output index 871719bba8..2f7a1d9386 100644 --- a/testing/btest/Baseline/core.print-bpf-filters/output +++ b/testing/btest/Baseline/core.print-bpf-filters/output @@ -3,38 +3,28 @@ #empty_field (empty) #unset_field - #path packet_filter -#open 2013-07-18-00-18-33 +#open 2013-07-19-02-54-13 #fields ts node filter init success #types time string string bool bool -1374106713.105591 - ip or not ip T T -#close 2013-07-18-00-18-33 +1374202453.158981 - ip or not ip T T +#close 2013-07-19-02-54-13 #separator \x09 #set_separator , #empty_field (empty) #unset_field - #path packet_filter -#open 2013-07-18-00-18-33 +#open 2013-07-19-02-54-13 #fields ts node filter init success #types time string string bool bool -1374106713.385541 - port 42 T T -#close 2013-07-18-00-18-33 +1374202453.437816 - port 42 T T +#close 2013-07-19-02-54-13 #separator \x09 #set_separator , #empty_field (empty) #unset_field - #path packet_filter -#open 2013-07-18-00-18-33 +#open 2013-07-19-02-54-13 #fields ts node filter init success #types time string string bool bool -1374106713.664282 - (vlan) and (ip or not ip) T T -#close 2013-07-18-00-18-33 -#separator \x09 -#set_separator , -#empty_field (empty) -#unset_field - -#path packet_filter -#open 2013-07-18-00-18-33 -#fields ts node filter init success -#types time string string bool bool -1374106713.957005 - ((((((((((((((((((((((tcp and port 5223) or (tcp and port 585)) or (tcp and port 614)) or (tcp and port 993)) or (tcp and port 636)) or (tcp and port 989)) or (tcp and port 995)) or (tcp and port 443)) or (tcp and port 563)) or (tcp and port 990)) or (tcp and port 992)) or ((tcp and port 2811) or (tcp and port 21))) or ((((tcp and port 6669) or (tcp and port 6666)) or (tcp and port 6668)) or (tcp and port 6667))) or ((udp and port 2152) or (udp and port 2123))) or (tcp and port 22)) or (tcp and port 1080)) or ((((((((tcp and port 631) or (tcp and port 8888)) or (tcp and port 3128)) or (tcp and port 80)) or (tcp and port 1080)) or (tcp and port 8000)) or (tcp and port 81)) or (tcp and port 8080))) or (udp and port 5072)) or ((tcp and port 25) or (tcp and port 587))) or (tcp and port 502)) or (udp and port 514)) or (((((udp and port 5355) or (tcp and port 53)) or (udp and port 5353)) or (udp and port 137)) or (udp and port 53))) or (udp and port 3544) T T -#close 2013-07-18-00-18-33 +1374202453.715717 - (vlan) and (ip or not ip) T T +#close 2013-07-19-02-54-13 diff --git a/testing/btest/Baseline/core.print-bpf-filters/output2 b/testing/btest/Baseline/core.print-bpf-filters/output2 new file mode 100644 index 0000000000..460b02e055 --- /dev/null +++ b/testing/btest/Baseline/core.print-bpf-filters/output2 @@ -0,0 +1,43 @@ + 2 1080 + 1 137 + 1 21 + 1 2123 + 1 2152 + 1 22 + 1 25 + 1 2811 + 1 3128 + 1 3544 + 1 443 + 1 502 + 1 5072 + 1 514 + 1 5223 + 2 53 + 1 5353 + 1 5355 + 1 563 + 1 585 + 1 587 + 1 614 + 1 631 + 1 636 + 1 6666 + 1 6667 + 1 6668 + 1 6669 + 1 80 + 1 8000 + 1 8080 + 1 81 + 1 8888 + 1 989 + 1 990 + 1 992 + 1 993 + 1 995 + 40 and + 39 or + 40 port + 31 tcp + 9 udp diff --git a/testing/btest/core/print-bpf-filters.bro b/testing/btest/core/print-bpf-filters.bro index 2c3d761cca..410db14b5d 100644 --- a/testing/btest/core/print-bpf-filters.bro +++ b/testing/btest/core/print-bpf-filters.bro @@ -4,7 +4,12 @@ # @TEST-EXEC: cat packet_filter.log >>output # @TEST-EXEC: bro -r $TRACES/mixed-vlan-mpls.trace PacketFilter::restricted_filter="vlan" >>output # @TEST-EXEC: cat packet_filter.log >>output -# @TEST-EXEC: bro -r $TRACES/empty.trace PacketFilter::enable_auto_protocol_capture_filters=T >>output -# @TEST-EXEC: cat packet_filter.log >>output # @TEST-EXEC: btest-diff output # @TEST-EXEC: btest-diff conn.log +# +# The order in the output of enable_auto_protocol_capture_filters isn't +# stable, for reasons not clear. We canonify it first. +# @TEST-EXEC: bro -r $TRACES/empty.trace PacketFilter::enable_auto_protocol_capture_filters=T +# @TEST-EXEC: cat packet_filter.log | bro-cut filter | sed 's#[()]##g' | tr ' ' '\n' | sort | uniq -c >output2 +# @TEST-EXEC: btest-diff output2 + From d3495207453aa5f10edef51699606856e9829987 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Thu, 18 Jul 2013 21:34:02 -0700 Subject: [PATCH 079/118] Another test fix. The classic "uniq -c" is not portable ... --- .../Baseline/core.print-bpf-filters/output2 | 86 +++++++++---------- testing/btest/core/print-bpf-filters.bro | 2 +- 2 files changed, 44 insertions(+), 44 deletions(-) diff --git a/testing/btest/Baseline/core.print-bpf-filters/output2 b/testing/btest/Baseline/core.print-bpf-filters/output2 index 460b02e055..99ad929fbf 100644 --- a/testing/btest/Baseline/core.print-bpf-filters/output2 +++ b/testing/btest/Baseline/core.print-bpf-filters/output2 @@ -1,43 +1,43 @@ - 2 1080 - 1 137 - 1 21 - 1 2123 - 1 2152 - 1 22 - 1 25 - 1 2811 - 1 3128 - 1 3544 - 1 443 - 1 502 - 1 5072 - 1 514 - 1 5223 - 2 53 - 1 5353 - 1 5355 - 1 563 - 1 585 - 1 587 - 1 614 - 1 631 - 1 636 - 1 6666 - 1 6667 - 1 6668 - 1 6669 - 1 80 - 1 8000 - 1 8080 - 1 81 - 1 8888 - 1 989 - 1 990 - 1 992 - 1 993 - 1 995 - 40 and - 39 or - 40 port - 31 tcp - 9 udp +2 1080 +1 137 +1 21 +1 2123 +1 2152 +1 22 +1 25 +1 2811 +1 3128 +1 3544 +1 443 +1 502 +1 5072 +1 514 +1 5223 +2 53 +1 5353 +1 5355 +1 563 +1 585 +1 587 +1 614 +1 631 +1 636 +1 6666 +1 6667 +1 6668 +1 6669 +1 80 +1 8000 +1 8080 +1 81 +1 8888 +1 989 +1 990 +1 992 +1 993 +1 995 +40 and +39 or +40 port +31 tcp +9 udp diff --git a/testing/btest/core/print-bpf-filters.bro b/testing/btest/core/print-bpf-filters.bro index 410db14b5d..6e4a4d5c30 100644 --- a/testing/btest/core/print-bpf-filters.bro +++ b/testing/btest/core/print-bpf-filters.bro @@ -10,6 +10,6 @@ # The order in the output of enable_auto_protocol_capture_filters isn't # stable, for reasons not clear. We canonify it first. # @TEST-EXEC: bro -r $TRACES/empty.trace PacketFilter::enable_auto_protocol_capture_filters=T -# @TEST-EXEC: cat packet_filter.log | bro-cut filter | sed 's#[()]##g' | tr ' ' '\n' | sort | uniq -c >output2 +# @TEST-EXEC: cat packet_filter.log | bro-cut filter | sed 's#[()]##g' | tr ' ' '\n' | sort | uniq -c | awk '{print $1, $2}' >output2 # @TEST-EXEC: btest-diff output2 From 9b444b2617c0a910a24ea938a3064eb092f26537 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Fri, 19 Jul 2013 13:16:12 -0400 Subject: [PATCH 080/118] Updates for the Intel Framework. - Intel importing format has changed (refer to docs). - All string matching is now case insensitive. - SMTP intel script has been updated to extract email addresses correctly. - Small fix sneaking into the smtp base script to actually extract individual email addresses in the To: field correctly. --- doc/intel.rst | 20 +-- scripts/base/frameworks/intel/main.bro | 141 ++++++++---------- scripts/base/protocols/smtp/main.bro | 5 +- .../frameworks/intel/conn-established.bro | 8 +- scripts/policy/frameworks/intel/dns.bro | 4 +- .../frameworks/intel/http-host-header.bro | 4 +- scripts/policy/frameworks/intel/http-url.bro | 4 +- .../frameworks/intel/http-user-agents.bro | 4 +- .../frameworks/intel/smtp-url-extraction.bro | 4 +- scripts/policy/frameworks/intel/smtp.bro | 70 ++++++--- scripts/policy/frameworks/intel/ssl.bro | 12 +- .../manager-1.intel.log | 10 +- .../broproc.intel.log | 12 +- .../manager-1.intel.log | 16 +- .../frameworks/intel/cluster-transparency.bro | 8 +- .../base/frameworks/intel/input-and-match.bro | 12 +- .../intel/read-file-dist-cluster.bro | 10 +- 17 files changed, 178 insertions(+), 166 deletions(-) diff --git a/doc/intel.rst b/doc/intel.rst index 390313461a..2a59a98974 100644 --- a/doc/intel.rst +++ b/doc/intel.rst @@ -29,9 +29,6 @@ Framework to be checked by loading this script in local.bro:: @load policy/frameworks/intel -(TODO: find some good mechanism for getting setup with good data -quickly) - Refer to the "Loading Intelligence" section below to see the format for Intelligence Framework text files, then load those text files with this line in local.bro:: @@ -61,16 +58,14 @@ data out to all of the nodes that need it. Here is an example of the intelligence data format. Note that all whitespace separators are literal tabs and fields containing only a -hyphen a considered to be null values.:: +hyphen are considered to be null values.:: - #fields host net str str_type meta.source meta.desc meta.url - 1.2.3.4 - - - source1 Sending phishing email http://source1.com/badhosts/1.2.3.4 - - 31.131.248.0/21 - - spamhaus-drop SBL154982 - - - - - a.b.com Intel::DOMAIN source2 Name used for data exfiltration - + #fields indicator indicator_type meta.source meta.desc meta.url + 1.2.3.4 Intel::ADDR source1 Sending phishing email http://source1.com/badhosts/1.2.3.4 + a.b.com Intel::DOMAIN source2 Name used for data exfiltration - -For more examples of built in `str_type` values, please refer to the -autogenerated documentation for the intelligence framework (TODO: -figure out how to do this link). +For more examples of built in `indicator_type` values, please refer to the +autogenerated documentation for the intelligence framework. To load the data once files are created, use the following example code to define files to load with your own file names of course:: @@ -90,8 +85,7 @@ When some bit of data is extracted (such as an email address in the "From" header in a message over SMTP), the Intelligence Framework needs to be informed that this data was discovered and it's presence should be checked within the intelligence data set. This is -accomplished through the Intel::seen (TODO: do a reference link) -function. +accomplished through the Intel::seen function. Typically users won't need to work with this function due to built in hook scripts that Bro ships with that will "see" data and send it into diff --git a/scripts/base/frameworks/intel/main.bro b/scripts/base/frameworks/intel/main.bro index aeb7bf4bfc..1b740f538d 100644 --- a/scripts/base/frameworks/intel/main.bro +++ b/scripts/base/frameworks/intel/main.bro @@ -10,13 +10,14 @@ module Intel; export { redef enum Log::ID += { LOG }; - ## String data needs to be further categoried since it could represent - ## and number of types of data. - type StrType: enum { + ## Enum type to represent various types of intelligence data. + type Type: enum { + ## An IP address. + ADDR, ## A complete URL without the prefix "http://". URL, - ## User-Agent string, typically HTTP or mail message body. - USER_AGENT, + ## Software name. + SOFTWARE, ## Email address. EMAIL, ## DNS domain name. @@ -44,18 +45,15 @@ export { ## Represents a piece of intelligence. type Item: record { - ## The IP address if the intelligence is about an IP address. - host: addr &optional; - ## The network if the intelligence is about a CIDR block. - net: subnet &optional; - ## The string if the intelligence is about a string. - str: string &optional; - ## The type of data that is in the string if the $str field is set. - str_type: StrType &optional; + ## The intelligence indicator. + indicator: string; + + ## The type of data that the indicator field represents. + indicator_type: Type; - ## Metadata for the item. Typically represents more deeply \ + ## Metadata for the item. Typically represents more deeply ## descriptive data for a piece of intelligence. - meta: MetaData; + meta: MetaData; }; ## Enum to represent where data came from when it was discovered. @@ -69,19 +67,22 @@ export { ## exclusive. These records *must* represent either an IP address being ## seen or a string being seen. type Seen: record { - ## The IP address if the data seen is an IP address. - host: addr &log &optional; ## The string if the data is about a string. - str: string &log &optional; - ## The type of data that is in the string if the $str field is set. - str_type: StrType &log &optional; + indicator: string &log &optional; + + ## The type of data that the indicator represents. + indicator_type: Type &log &optional; + + ## If the indicator type was :bro:enum:`Intel::ADDR`, then this + ## field will be present. + host: addr &optional; ## Where the data was discovered. - where: Where &log; + where: Where &log; ## If the data was discovered within a connection, the ## connection record should go into get to give context to the data. - conn: connection &optional; + conn: connection &optional; }; ## Record used for the logging framework representing a positive @@ -100,7 +101,7 @@ export { ## Where the data was seen. seen: Seen &log; ## Sources which supplied data that resulted in this match. - sources: set[string] &log; + sources: set[string] &log &default=string_set(); }; ## Intelligence data manipulation functions. @@ -135,8 +136,8 @@ const have_full_data = T &redef; # The in memory data structure for holding intelligence. type DataStore: record { - net_data: table[subnet] of set[MetaData]; - string_data: table[string, StrType] of set[MetaData]; + host_data: table[addr] of set[MetaData]; + string_data: table[string, Type] of set[MetaData]; }; global data_store: DataStore &redef; @@ -144,8 +145,8 @@ global data_store: DataStore &redef; # This is primarily for workers to do the initial quick matches and store # a minimal amount of data for the full match to happen on the manager. type MinDataStore: record { - net_data: set[subnet]; - string_data: set[string, StrType]; + host_data: set[addr]; + string_data: set[string, Type]; }; global min_data_store: MinDataStore &redef; @@ -157,15 +158,13 @@ event bro_init() &priority=5 function find(s: Seen): bool { - if ( s?$host && - ((have_full_data && s$host in data_store$net_data) || - (s$host in min_data_store$net_data))) + if ( s?$host ) { - return T; + return ((s$host in min_data_store$host_data) || + (have_full_data && s$host in data_store$host_data)); } - else if ( s?$str && s?$str_type && - ((have_full_data && [s$str, s$str_type] in data_store$string_data) || - ([s$str, s$str_type] in min_data_store$string_data))) + else if ( ([to_lower(s$indicator), s$indicator_type] in min_data_store$string_data) || + (have_full_data && [to_lower(s$indicator), s$indicator_type] in data_store$string_data) ) { return T; } @@ -177,8 +176,7 @@ function find(s: Seen): bool function get_items(s: Seen): set[Item] { - local item: Item; - local return_data: set[Item] = set(); + local return_data: set[Item]; if ( ! have_full_data ) { @@ -191,26 +189,23 @@ function get_items(s: Seen): set[Item] if ( s?$host ) { # See if the host is known about and it has meta values - if ( s$host in data_store$net_data ) + if ( s$host in data_store$host_data ) { - for ( m in data_store$net_data[s$host] ) + for ( m in data_store$host_data[s$host] ) { - # TODO: the lookup should be finding all and not just most specific - # and $host/$net should have the correct value. - item = [$host=s$host, $meta=m]; - add return_data[item]; + add return_data[Item($indicator=cat(s$host), $indicator_type=ADDR, $meta=m)]; } } } - else if ( s?$str && s?$str_type ) + else { + local lower_indicator = to_lower(s$indicator); # See if the string is known about and it has meta values - if ( [s$str, s$str_type] in data_store$string_data ) + if ( [lower_indicator, s$indicator_type] in data_store$string_data ) { - for ( m in data_store$string_data[s$str, s$str_type] ) + for ( m in data_store$string_data[lower_indicator, s$indicator_type] ) { - item = [$str=s$str, $str_type=s$str_type, $meta=m]; - add return_data[item]; + add return_data[Item($indicator=s$indicator, $indicator_type=s$indicator_type, $meta=m)]; } } } @@ -222,6 +217,12 @@ function Intel::seen(s: Seen) { if ( find(s) ) { + if ( s?$host ) + { + s$indicator = cat(s$host); + s$indicator_type = Intel::ADDR; + } + if ( have_full_data ) { local items = get_items(s); @@ -250,8 +251,7 @@ function has_meta(check: MetaData, metas: set[MetaData]): bool event Intel::match(s: Seen, items: set[Item]) &priority=5 { - local empty_set: set[string] = set(); - local info: Info = [$ts=network_time(), $seen=s, $sources=empty_set]; + local info: Info = [$ts=network_time(), $seen=s]; if ( s?$conn ) { @@ -267,52 +267,37 @@ event Intel::match(s: Seen, items: set[Item]) &priority=5 function insert(item: Item) { - if ( item?$str && !item?$str_type ) - { - event reporter_warning(network_time(), fmt("You must provide a str_type for strings or this item doesn't make sense. Item: %s", item), ""); - return; - } - # Create and fill out the meta data item. local meta = item$meta; local metas: set[MetaData]; - if ( item?$host ) + # All intelligence is case insensitive at the moment. + local lower_indicator = to_lower(item$indicator); + + if ( item$indicator_type == ADDR ) { - local host = mask_addr(item$host, is_v4_addr(item$host) ? 32 : 128); + local host = to_addr(item$indicator); if ( have_full_data ) { - if ( host !in data_store$net_data ) - data_store$net_data[host] = set(); + if ( host !in data_store$host_data ) + data_store$host_data[host] = set(); - metas = data_store$net_data[host]; + metas = data_store$host_data[host]; } - add min_data_store$net_data[host]; + add min_data_store$host_data[host]; } - else if ( item?$net ) + else { if ( have_full_data ) { - if ( item$net !in data_store$net_data ) - data_store$net_data[item$net] = set(); + if ( [lower_indicator, item$indicator_type] !in data_store$string_data ) + data_store$string_data[lower_indicator, item$indicator_type] = set(); - metas = data_store$net_data[item$net]; + metas = data_store$string_data[lower_indicator, item$indicator_type]; } - add min_data_store$net_data[item$net]; - } - else if ( item?$str ) - { - if ( have_full_data ) - { - if ( [item$str, item$str_type] !in data_store$string_data ) - data_store$string_data[item$str, item$str_type] = set(); - - metas = data_store$string_data[item$str, item$str_type]; - } - - add min_data_store$string_data[item$str, item$str_type]; + add min_data_store$string_data[lower_indicator, item$indicator_type]; } local updated = F; diff --git a/scripts/base/protocols/smtp/main.bro b/scripts/base/protocols/smtp/main.bro index d53128b06c..0d510e645d 100644 --- a/scripts/base/protocols/smtp/main.bro +++ b/scripts/base/protocols/smtp/main.bro @@ -223,7 +223,10 @@ event mime_one_header(c: connection, h: mime_header_rec) &priority=5 { if ( ! c$smtp?$to ) c$smtp$to = set(); - add c$smtp$to[h$value]; + + local to_parts = split(h$value, /[[:blank:]]*,[[:blank:]]*/); + for ( i in to_parts ) + add c$smtp$to[to_parts[i]]; } else if ( h$name == "X-ORIGINATING-IP" ) diff --git a/scripts/policy/frameworks/intel/conn-established.bro b/scripts/policy/frameworks/intel/conn-established.bro index a2e67b292b..20cec43e04 100644 --- a/scripts/policy/frameworks/intel/conn-established.bro +++ b/scripts/policy/frameworks/intel/conn-established.bro @@ -3,6 +3,10 @@ event connection_established(c: connection) { - Intel::seen([$host=c$id$orig_h, $conn=c, $where=Conn::IN_ORIG]); - Intel::seen([$host=c$id$resp_h, $conn=c, $where=Conn::IN_RESP]); + if ( c$orig$state == TCP_ESTABLISHED && + c$resp$state == TCP_ESTABLISHED ) + { + Intel::seen([$host=c$id$orig_h, $conn=c, $where=Conn::IN_ORIG]); + Intel::seen([$host=c$id$resp_h, $conn=c, $where=Conn::IN_RESP]); + } } diff --git a/scripts/policy/frameworks/intel/dns.bro b/scripts/policy/frameworks/intel/dns.bro index a0dee47acf..9218586c95 100644 --- a/scripts/policy/frameworks/intel/dns.bro +++ b/scripts/policy/frameworks/intel/dns.bro @@ -3,8 +3,8 @@ event dns_request(c: connection, msg: dns_msg, query: string, qtype: count, qclass: count) { - Intel::seen([$str=query, - $str_type=Intel::DOMAIN, + Intel::seen([$indicator=query, + $indicator_type=Intel::DOMAIN, $conn=c, $where=DNS::IN_REQUEST]); } diff --git a/scripts/policy/frameworks/intel/http-host-header.bro b/scripts/policy/frameworks/intel/http-host-header.bro index f16b1628aa..3fd28b8ef9 100644 --- a/scripts/policy/frameworks/intel/http-host-header.bro +++ b/scripts/policy/frameworks/intel/http-host-header.bro @@ -4,8 +4,8 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) { if ( is_orig && name == "HOST" ) - Intel::seen([$str=value, - $str_type=Intel::DOMAIN, + Intel::seen([$indicator=value, + $indicator_type=Intel::DOMAIN, $conn=c, $where=HTTP::IN_HOST_HEADER]); } diff --git a/scripts/policy/frameworks/intel/http-url.bro b/scripts/policy/frameworks/intel/http-url.bro index feef4f0dac..340ae3c5ab 100644 --- a/scripts/policy/frameworks/intel/http-url.bro +++ b/scripts/policy/frameworks/intel/http-url.bro @@ -5,8 +5,8 @@ event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) { if ( is_orig && c?$http ) - Intel::seen([$str=HTTP::build_url(c$http), - $str_type=Intel::URL, + Intel::seen([$indicator=HTTP::build_url(c$http), + $indicator_type=Intel::URL, $conn=c, $where=HTTP::IN_URL]); } diff --git a/scripts/policy/frameworks/intel/http-user-agents.bro b/scripts/policy/frameworks/intel/http-user-agents.bro index 93445c1e43..7c4558d2a5 100644 --- a/scripts/policy/frameworks/intel/http-user-agents.bro +++ b/scripts/policy/frameworks/intel/http-user-agents.bro @@ -4,8 +4,8 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) { if ( is_orig && name == "USER-AGENT" ) - Intel::seen([$str=value, - $str_type=Intel::USER_AGENT, + Intel::seen([$indicator=value, + $indicator_type=Intel::SOFTWARE, $conn=c, $where=HTTP::IN_USER_AGENT_HEADER]); } diff --git a/scripts/policy/frameworks/intel/smtp-url-extraction.bro b/scripts/policy/frameworks/intel/smtp-url-extraction.bro index 2b87f809a6..a3ba410641 100644 --- a/scripts/policy/frameworks/intel/smtp-url-extraction.bro +++ b/scripts/policy/frameworks/intel/smtp-url-extraction.bro @@ -13,8 +13,8 @@ event intel_mime_data(f: fa_file, data: string) local urls = find_all_urls_without_scheme(data); for ( url in urls ) { - Intel::seen([$str=url, - $str_type=Intel::URL, + Intel::seen([$indicator=url, + $indicator_type=Intel::URL, $conn=c, $where=SMTP::IN_MESSAGE]); } diff --git a/scripts/policy/frameworks/intel/smtp.bro b/scripts/policy/frameworks/intel/smtp.bro index 02e97ea54a..d760995e51 100644 --- a/scripts/policy/frameworks/intel/smtp.bro +++ b/scripts/policy/frameworks/intel/smtp.bro @@ -18,8 +18,8 @@ event mime_end_entity(c: connection) } if ( c$smtp?$user_agent ) - Intel::seen([$str=c$smtp$user_agent, - $str_type=Intel::USER_AGENT, + Intel::seen([$indicator=c$smtp$user_agent, + $indicator_type=Intel::SOFTWARE, $conn=c, $where=SMTP::IN_HEADER]); @@ -29,43 +29,69 @@ event mime_end_entity(c: connection) $where=SMTP::IN_X_ORIGINATING_IP_HEADER]); if ( c$smtp?$mailfrom ) - Intel::seen([$str=c$smtp$mailfrom, - $str_type=Intel::EMAIL, - $conn=c, - $where=SMTP::IN_MAIL_FROM]); + { + local mailfromparts = split_n(c$smtp$mailfrom, /<.+>/, T, 1); + if ( |mailfromparts| > 2 ) + { + Intel::seen([$indicator=mailfromparts[2][1:-2], + $indicator_type=Intel::EMAIL, + $conn=c, + $where=SMTP::IN_MAIL_FROM]); + } + } if ( c$smtp?$rcptto ) { for ( rcptto in c$smtp$rcptto ) { - Intel::seen([$str=rcptto, - $str_type=Intel::EMAIL, - $conn=c, - $where=SMTP::IN_RCPT_TO]); + local rcpttoparts = split_n(rcptto, /<.+>/, T, 1); + if ( |rcpttoparts| > 2 ) + { + Intel::seen([$indicator=rcpttoparts[2][1:-2], + $indicator_type=Intel::EMAIL, + $conn=c, + $where=SMTP::IN_RCPT_TO]); + } } } if ( c$smtp?$from ) - Intel::seen([$str=c$smtp$from, - $str_type=Intel::EMAIL, - $conn=c, - $where=SMTP::IN_FROM]); + { + local fromparts = split_n(c$smtp$from, /<.+>/, T, 1); + if ( |fromparts| > 2 ) + { + Intel::seen([$indicator=fromparts[2][1:-2], + $indicator_type=Intel::EMAIL, + $conn=c, + $where=SMTP::IN_FROM]); + } + } if ( c$smtp?$to ) { for ( email_to in c$smtp$to ) { - Intel::seen([$str=email_to, - $str_type=Intel::EMAIL, - $conn=c, - $where=SMTP::IN_TO]); + local toparts = split_n(email_to, /<.+>/, T, 1); + if ( |toparts| > 2 ) + { + Intel::seen([$indicator=toparts[2][1:-2], + $indicator_type=Intel::EMAIL, + $conn=c, + $where=SMTP::IN_TO]); + } } } if ( c$smtp?$reply_to ) - Intel::seen([$str=c$smtp$reply_to, - $str_type=Intel::EMAIL, - $conn=c, - $where=SMTP::IN_REPLY_TO]); + { + local replytoparts = split_n(c$smtp$reply_to, /<.+>/, T, 1); + if ( |replytoparts| > 2 ) + { + Intel::seen([$indicator=replytoparts[2][1:-2], + $indicator_type=Intel::EMAIL, + $conn=c, + $where=SMTP::IN_REPLY_TO]); + } + } } } diff --git a/scripts/policy/frameworks/intel/ssl.bro b/scripts/policy/frameworks/intel/ssl.bro index 3f18a11e6e..e404c39e5b 100644 --- a/scripts/policy/frameworks/intel/ssl.bro +++ b/scripts/policy/frameworks/intel/ssl.bro @@ -10,14 +10,14 @@ event x509_certificate(c: connection, is_orig: bool, cert: X509, chain_idx: coun { local email = sub(cert$subject, /^.*emailAddress=/, ""); email = sub(email, /,.*$/, ""); - Intel::seen([$str=email, - $str_type=Intel::EMAIL, + Intel::seen([$indicator=email, + $indicator_type=Intel::EMAIL, $conn=c, $where=(is_orig ? SSL::IN_CLIENT_CERT : SSL::IN_SERVER_CERT)]); } - Intel::seen([$str=sha1_hash(der_cert), - $str_type=Intel::CERT_HASH, + Intel::seen([$indicator=sha1_hash(der_cert), + $indicator_type=Intel::CERT_HASH, $conn=c, $where=(is_orig ? SSL::IN_CLIENT_CERT : SSL::IN_SERVER_CERT)]); } @@ -27,8 +27,8 @@ event ssl_extension(c: connection, is_orig: bool, code: count, val: string) { if ( is_orig && SSL::extensions[code] == "server_name" && c?$ssl && c$ssl?$server_name ) - Intel::seen([$str=c$ssl$server_name, - $str_type=Intel::DOMAIN, + Intel::seen([$indicator=c$ssl$server_name, + $indicator_type=Intel::DOMAIN, $conn=c, $where=SSL::IN_SERVER_NAME]); } diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log index 26efc039c4..00871e7d93 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.cluster-transparency/manager-1.intel.log @@ -3,8 +3,8 @@ #empty_field (empty) #unset_field - #path intel -#open 2012-10-03-20-20-39 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.host seen.str seen.str_type seen.where sources -#types time string addr port addr port addr string enum enum table[string] -1349295639.424940 - - - - - 123.123.123.123 - - Intel::IN_ANYWHERE worker-1 -#close 2012-10-03-20-20-49 +#open 2013-07-19-17-05-48 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.indicator seen.indicator_type seen.where sources +#types time string addr port addr port string enum enum table[string] +1374253548.038580 - - - - - 123.123.123.123 Intel::ADDR Intel::IN_ANYWHERE worker-1 +#close 2013-07-19-17-05-57 diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/broproc.intel.log b/testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/broproc.intel.log index d72e9efed3..8c01ae5c27 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/broproc.intel.log +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.input-and-match/broproc.intel.log @@ -3,9 +3,9 @@ #empty_field (empty) #unset_field - #path intel -#open 2012-10-03-20-18-05 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.host seen.str seen.str_type seen.where sources -#types time string addr port addr port addr string enum enum table[string] -1349295485.114156 - - - - - - e@mail.com Intel::EMAIL SOMEWHERE source1 -1349295485.114156 - - - - - 1.2.3.4 - - SOMEWHERE source1 -#close 2012-10-03-20-18-05 +#open 2013-07-19-17-04-26 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.indicator seen.indicator_type seen.where sources +#types time string addr port addr port string enum enum table[string] +1374253466.857185 - - - - - e@mail.com Intel::EMAIL SOMEWHERE source1 +1374253466.857185 - - - - - 1.2.3.4 Intel::ADDR SOMEWHERE source1 +#close 2013-07-19-17-04-26 diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1.intel.log b/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1.intel.log index 8069bad528..70d92a3604 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1.intel.log +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1.intel.log @@ -3,11 +3,11 @@ #empty_field (empty) #unset_field - #path intel -#open 2012-10-10-15-05-23 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.host seen.str seen.str_type seen.where sources -#types time string addr port addr port addr string enum enum table[string] -1349881523.548946 - - - - - 1.2.3.4 - - Intel::IN_A_TEST source1 -1349881523.548946 - - - - - - e@mail.com Intel::EMAIL Intel::IN_A_TEST source1 -1349881524.567896 - - - - - 1.2.3.4 - - Intel::IN_A_TEST source1 -1349881524.567896 - - - - - - e@mail.com Intel::EMAIL Intel::IN_A_TEST source1 -#close 2012-10-10-15-05-24 +#open 2013-07-19-17-06-57 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.indicator seen.indicator_type seen.where sources +#types time string addr port addr port string enum enum table[string] +1374253617.312158 - - - - - 1.2.3.4 Intel::ADDR Intel::IN_A_TEST source1 +1374253617.312158 - - - - - e@mail.com Intel::EMAIL Intel::IN_A_TEST source1 +1374253618.332565 - - - - - 1.2.3.4 Intel::ADDR Intel::IN_A_TEST source1 +1374253618.332565 - - - - - e@mail.com Intel::EMAIL Intel::IN_A_TEST source1 +#close 2013-07-19-17-07-06 diff --git a/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro b/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro index 3810de5d4b..4d977d475d 100644 --- a/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro +++ b/testing/btest/scripts/base/frameworks/intel/cluster-transparency.bro @@ -28,7 +28,7 @@ event remote_connection_handshake_done(p: event_peer) # Insert the data once both workers are connected. if ( Cluster::local_node_type() == Cluster::MANAGER && Cluster::worker_count == 2 ) { - Intel::insert([$host=1.2.3.4,$meta=[$source="manager"]]); + Intel::insert([$indicator="1.2.3.4", $indicator_type=Intel::ADDR, $meta=[$source="manager"]]); } } @@ -39,7 +39,7 @@ event Intel::cluster_new_item(item: Intel::Item) if ( ! is_remote_event() ) return; - print fmt("cluster_new_item: %s inserted by %s (from peer: %s)", item$host, item$meta$source, get_event_peer()$descr); + print fmt("cluster_new_item: %s inserted by %s (from peer: %s)", item$indicator, item$meta$source, get_event_peer()$descr); if ( ! sent_data ) { @@ -47,9 +47,9 @@ event Intel::cluster_new_item(item: Intel::Item) # full cluster is constructed. sent_data = T; if ( Cluster::node == "worker-1" ) - Intel::insert([$host=123.123.123.123,$meta=[$source="worker-1"]]); + Intel::insert([$indicator="123.123.123.123", $indicator_type=Intel::ADDR, $meta=[$source="worker-1"]]); if ( Cluster::node == "worker-2" ) - Intel::insert([$host=4.3.2.1,$meta=[$source="worker-2"]]); + Intel::insert([$indicator="4.3.2.1", $indicator_type=Intel::ADDR, $meta=[$source="worker-2"]]); } # We're forcing worker-2 to do a lookup when it has three intelligence items diff --git a/testing/btest/scripts/base/frameworks/intel/input-and-match.bro b/testing/btest/scripts/base/frameworks/intel/input-and-match.bro index f77f5c0f1d..7150d30993 100644 --- a/testing/btest/scripts/base/frameworks/intel/input-and-match.bro +++ b/testing/btest/scripts/base/frameworks/intel/input-and-match.bro @@ -5,10 +5,10 @@ # @TEST-EXEC: btest-diff broproc/intel.log @TEST-START-FILE intel.dat -#fields host net str str_type meta.source meta.desc meta.url -1.2.3.4 - - - source1 this host is just plain baaad http://some-data-distributor.com/1234 -1.2.3.4 - - - source1 this host is just plain baaad http://some-data-distributor.com/1234 -- - e@mail.com Intel::EMAIL source1 Phishing email source http://some-data-distributor.com/100000 +#fields indicator indicator_type meta.source meta.desc meta.url +1.2.3.4 Intel::ADDR source1 this host is just plain baaad http://some-data-distributor.com/1234 +1.2.3.4 Intel::ADDR source1 this host is just plain baaad http://some-data-distributor.com/1234 +e@mail.com Intel::EMAIL source1 Phishing email source http://some-data-distributor.com/100000 @TEST-END-FILE @load frameworks/communication/listen @@ -18,8 +18,8 @@ redef enum Intel::Where += { SOMEWHERE }; event do_it() { - Intel::seen([$str="e@mail.com", - $str_type=Intel::EMAIL, + Intel::seen([$indicator="e@mail.com", + $indicator_type=Intel::EMAIL, $where=SOMEWHERE]); Intel::seen([$host=1.2.3.4, diff --git a/testing/btest/scripts/base/frameworks/intel/read-file-dist-cluster.bro b/testing/btest/scripts/base/frameworks/intel/read-file-dist-cluster.bro index 6838736249..f336fe24b3 100644 --- a/testing/btest/scripts/base/frameworks/intel/read-file-dist-cluster.bro +++ b/testing/btest/scripts/base/frameworks/intel/read-file-dist-cluster.bro @@ -19,10 +19,10 @@ redef Cluster::nodes = { @TEST-END-FILE @TEST-START-FILE intel.dat -#fields host net str str_type meta.source meta.desc meta.url -1.2.3.4 - - - source1 this host is just plain baaad http://some-data-distributor.com/1234 -1.2.3.4 - - - source1 this host is just plain baaad http://some-data-distributor.com/1234 -- - e@mail.com Intel::EMAIL source1 Phishing email source http://some-data-distributor.com/100000 +#fields indicator indicator_type meta.source meta.desc meta.url +1.2.3.4 Intel::ADDR source1 this host is just plain baaad http://some-data-distributor.com/1234 +1.2.3.4 Intel::ADDR source1 this host is just plain baaad http://some-data-distributor.com/1234 +e@mail.com Intel::EMAIL source1 Phishing email source http://some-data-distributor.com/100000 @TEST-END-FILE @load base/frameworks/control @@ -41,7 +41,7 @@ redef enum Intel::Where += { event do_it() { Intel::seen([$host=1.2.3.4, $where=Intel::IN_A_TEST]); - Intel::seen([$str="e@mail.com", $str_type=Intel::EMAIL, $where=Intel::IN_A_TEST]); + Intel::seen([$indicator="e@mail.com", $indicator_type=Intel::EMAIL, $where=Intel::IN_A_TEST]); } event bro_init() From 9dae9dd3e26627d50c3a3620205eee3db88b2e4b Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Fri, 19 Jul 2013 13:53:15 -0400 Subject: [PATCH 081/118] Remove the intel insertion after heuristically detecting ssh bruteforcing. --- scripts/policy/protocols/ssh/detect-bruteforcing.bro | 4 ---- 1 file changed, 4 deletions(-) diff --git a/scripts/policy/protocols/ssh/detect-bruteforcing.bro b/scripts/policy/protocols/ssh/detect-bruteforcing.bro index 309905e939..ada418e61f 100644 --- a/scripts/policy/protocols/ssh/detect-bruteforcing.bro +++ b/scripts/policy/protocols/ssh/detect-bruteforcing.bro @@ -58,10 +58,6 @@ event bro_init() $msg=fmt("%s appears to be guessing SSH passwords (seen in %d connections).", key$host, r$num), $src=key$host, $identifier=cat(key$host)]); - # Insert the guesser into the intel framework. - Intel::insert([$host=key$host, - $meta=[$source="local", - $desc=fmt("Bro observed %d apparently failed SSH connections.", r$num)]]); }]); } From fd2e155d1af26086d40e12d38f564b7954f4597e Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Sun, 21 Jul 2013 17:34:25 +0200 Subject: [PATCH 082/118] Tweak hasher interface. --- src/BloomFilter.cc | 34 +++++++------- src/BloomFilter.h | 31 +++++++------ src/CMakeLists.txt | 2 +- src/HashPolicy.cc | 77 -------------------------------- src/HashPolicy.h | 97 ---------------------------------------- src/Hasher.cc | 79 ++++++++++++++++++++++++++++++++ src/Hasher.h | 109 +++++++++++++++++++++++++++++++++++++++++++++ src/bro.bif | 8 ++-- 8 files changed, 225 insertions(+), 212 deletions(-) delete mode 100644 src/HashPolicy.cc delete mode 100644 src/HashPolicy.h create mode 100644 src/Hasher.cc create mode 100644 src/Hasher.h diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc index c59092b1e4..f399bddeca 100644 --- a/src/BloomFilter.cc +++ b/src/BloomFilter.cc @@ -6,19 +6,19 @@ #include "Serializer.h" BloomFilter::BloomFilter() - : hash_(NULL) + : hasher_(NULL) { } -BloomFilter::BloomFilter(const HashPolicy* hash_policy) - : hash_(hash_policy) +BloomFilter::BloomFilter(const Hasher* hasher) + : hasher_(hasher) { } BloomFilter::~BloomFilter() { - if ( hash_ ) - delete hash_; + if ( hasher_ ) + delete hasher_; } bool BloomFilter::Serialize(SerialInfo* info) const @@ -35,9 +35,9 @@ BloomFilter* BloomFilter::Unserialize(UnserialInfo* info) bool BloomFilter::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_BLOOMFILTER, SerialObj); - if ( ! SERIALIZE(static_cast(hash_->K())) ) + if ( ! SERIALIZE(static_cast(hasher_->K())) ) return false; - return SERIALIZE_STR(hash_->Name().c_str(), hash_->Name().size()); + return SERIALIZE_STR(hasher_->Name().c_str(), hasher_->Name().size()); } bool BloomFilter::DoUnserialize(UnserialInfo* info) @@ -49,7 +49,7 @@ bool BloomFilter::DoUnserialize(UnserialInfo* info) const char* name; if ( ! UNSERIALIZE_STR(&name, 0) ) return false; - hash_ = HashPolicy::Create(k, name); + hasher_ = Hasher::Create(k, name); delete [] name; return true; } @@ -70,7 +70,7 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity) BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x, const BasicBloomFilter* y) { - // TODO: Ensure that x and y use the same HashPolicy before proceeding. + // TODO: Ensure that x and y use the same Hasher before proceeding. BasicBloomFilter* result = new BasicBloomFilter(); result->bits_ = new BitVector(*x->bits_ | *y->bits_); return result; @@ -81,8 +81,8 @@ BasicBloomFilter::BasicBloomFilter() { } -BasicBloomFilter::BasicBloomFilter(const HashPolicy* hash_policy, size_t cells) - : BloomFilter(hash_policy), +BasicBloomFilter::BasicBloomFilter(const Hasher* hasher, size_t cells) + : BloomFilter(hasher), bits_(new BitVector(cells)) { } @@ -102,13 +102,13 @@ bool BasicBloomFilter::DoUnserialize(UnserialInfo* info) return bits_ != NULL; } -void BasicBloomFilter::AddImpl(const HashPolicy::hash_vector& h) +void BasicBloomFilter::AddImpl(const Hasher::digest_vector& h) { for ( size_t i = 0; i < h.size(); ++i ) bits_->Set(h[i] % bits_->Size()); } -size_t BasicBloomFilter::CountImpl(const HashPolicy::hash_vector& h) const +size_t BasicBloomFilter::CountImpl(const Hasher::digest_vector& h) const { for ( size_t i = 0; i < h.size(); ++i ) if ( ! (*bits_)[h[i] % bits_->Size()] ) @@ -129,9 +129,9 @@ CountingBloomFilter::CountingBloomFilter() { } -CountingBloomFilter::CountingBloomFilter(const HashPolicy* hash_policy, +CountingBloomFilter::CountingBloomFilter(const Hasher* hasher, size_t cells, size_t width) - : BloomFilter(hash_policy) + : BloomFilter(hasher) { cells_ = new CounterVector(width, cells); } @@ -152,13 +152,13 @@ bool CountingBloomFilter::DoUnserialize(UnserialInfo* info) return cells_ != NULL; } -void CountingBloomFilter::AddImpl(const HashPolicy::hash_vector& h) +void CountingBloomFilter::AddImpl(const Hasher::digest_vector& h) { for ( size_t i = 0; i < h.size(); ++i ) cells_->Increment(h[i] % cells_->Size(), 1); } -size_t CountingBloomFilter::CountImpl(const HashPolicy::hash_vector& h) const +size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const { CounterVector::size_type min = std::numeric_limits::max(); diff --git a/src/BloomFilter.h b/src/BloomFilter.h index 189f4920b7..92f15c6070 100644 --- a/src/BloomFilter.h +++ b/src/BloomFilter.h @@ -3,7 +3,7 @@ #include #include "BitVector.h" -#include "HashPolicy.h" +#include "Hasher.h" class CounterVector; @@ -12,7 +12,7 @@ class CounterVector; */ class BloomFilter : public SerialObj { public: - // At this point we won't let the user choose the hash policy, but we might + // At this point we won't let the user choose the hasher, but we might // open up the interface in the future. virtual ~BloomFilter(); @@ -23,7 +23,7 @@ public: template void Add(const T& x) { - AddImpl(hash_->Hash(&x, sizeof(x))); + AddImpl((*hasher_)(x)); } /** @@ -36,7 +36,7 @@ public: template size_t Count(const T& x) const { - return CountImpl(hash_->Hash(&x, sizeof(x))); + return CountImpl((*hasher_)(x)); } bool Serialize(SerialInfo* info) const; @@ -50,15 +50,15 @@ protected: /** * Constructs a Bloom filter. * - * @param hash_policy The hash policy to use for this Bloom filter. + * @param hasher The hasher to use for this Bloom filter. */ - BloomFilter(const HashPolicy* hash_policy); + BloomFilter(const Hasher* hasher); - virtual void AddImpl(const HashPolicy::hash_vector& hashes) = 0; - virtual size_t CountImpl(const HashPolicy::hash_vector& hashes) const = 0; + virtual void AddImpl(const Hasher::digest_vector& hashes) = 0; + virtual size_t CountImpl(const Hasher::digest_vector& hashes) const = 0; private: - const HashPolicy* hash_; + const Hasher* hasher_; }; /** @@ -98,15 +98,15 @@ public: /** * Constructs a basic Bloom filter with a given number of cells and capacity. */ - BasicBloomFilter(const HashPolicy* hash_policy, size_t cells); + BasicBloomFilter(const Hasher* hasher, size_t cells); protected: DECLARE_SERIAL(BasicBloomFilter); BasicBloomFilter(); - virtual void AddImpl(const HashPolicy::hash_vector& h); - virtual size_t CountImpl(const HashPolicy::hash_vector& h) const; + virtual void AddImpl(const Hasher::digest_vector& h); + virtual size_t CountImpl(const Hasher::digest_vector& h) const; private: BitVector* bits_; @@ -120,16 +120,15 @@ public: static CountingBloomFilter* Merge(const CountingBloomFilter* x, const CountingBloomFilter* y); - CountingBloomFilter(const HashPolicy* hash_policy, size_t cells, - size_t width); + CountingBloomFilter(const Hasher* hasher, size_t cells, size_t width); protected: DECLARE_SERIAL(CountingBloomFilter); CountingBloomFilter(); - virtual void AddImpl(const HashPolicy::hash_vector& h); - virtual size_t CountImpl(const HashPolicy::hash_vector& h) const; + virtual void AddImpl(const Hasher::digest_vector& h); + virtual size_t CountImpl(const Hasher::digest_vector& h) const; private: CounterVector* cells_; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f2c7ce6bad..87a3db3b62 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -279,7 +279,7 @@ set(bro_SRCS Frame.cc Func.cc Hash.cc - HashPolicy.cc + Hasher.cc ID.cc IntSet.cc IOSource.cc diff --git a/src/HashPolicy.cc b/src/HashPolicy.cc deleted file mode 100644 index 7ce754be3c..0000000000 --- a/src/HashPolicy.cc +++ /dev/null @@ -1,77 +0,0 @@ -#include "HashPolicy.h" - -#include "digest.h" - -Hasher::Hasher(size_t seed, const std::string& extra) - : h_(compute_seed(seed, extra)) - { - } - -Hasher::hash_type Hasher::operator()(const void* x, size_t n) const - { - return n == 0 ? 0 : h_(x, n); - } - -size_t Hasher::compute_seed(size_t seed, const std::string& extra) - { - u_char digest[SHA256_DIGEST_LENGTH]; - SHA256_CTX ctx; - sha256_init(&ctx); - if ( extra.empty() ) - { - unsigned int first_seed = initial_seed(); - sha256_update(&ctx, &first_seed, sizeof(first_seed)); - } - else - { - sha256_update(&ctx, extra.c_str(), extra.size()); - } - sha256_update(&ctx, &seed, sizeof(seed)); - sha256_final(&ctx, digest); - return *reinterpret_cast(digest); - } - - -HashPolicy* HashPolicy::Create(size_t k, const std::string& name) - { - return new DefaultHashing(k, name); - } - -HashPolicy::HashPolicy(size_t k, const std::string& name) - : k_(k), name_(name) - { - } - -DefaultHashing::DefaultHashing(size_t k, const std::string& name) - : HashPolicy(k, name) - { - for ( size_t i = 0; i < k; ++i ) - hashers_.push_back(Hasher(i, name)); - } - -HashPolicy::hash_vector DefaultHashing::Hash(const void* x, size_t n) const - { - hash_vector h(K(), 0); - for ( size_t i = 0; i < h.size(); ++i ) - h[i] = hashers_[i](x, n); - return h; - } - -DoubleHashing::DoubleHashing(size_t k, const std::string& name) - : HashPolicy(k, name), - hasher1_(1, name), - hasher2_(2, name) - { - } - -HashPolicy::hash_vector DoubleHashing::Hash(const void* x, size_t n) const - { - hash_type h1 = hasher1_(x, n); - hash_type h2 = hasher2_(x, n); - hash_vector h(K(), 0); - for ( size_t i = 0; i < h.size(); ++i ) - h[i] = h1 + i * h2; - return h; - } - - diff --git a/src/HashPolicy.h b/src/HashPolicy.h deleted file mode 100644 index 7bdb968bfe..0000000000 --- a/src/HashPolicy.h +++ /dev/null @@ -1,97 +0,0 @@ -#ifndef HashPolicy_h -#define HashPolicy_h - -#include "Hash.h" -#include "H3.h" - -/** - * A functor that computes a universal hash function. - */ -class Hasher { -public: - typedef hash_t hash_type; - - /** - * Constructs a hasher seeded by a given seed and optionally an extra - * descriptor. - * - * @param seed The seed to use. - * - * @param extra If not `NULL`, the hasher will not mix in the initial seed - * but instead use this NUL-terminated string as additional seed. - */ - Hasher(size_t seed, const std::string& extra = ""); - - /** - * Computes the hash digest of contiguous data. - * - * @param x A pointer to the beginning of the byte sequence to hash. - * - * @param n The length of the sequence pointed to by *x*. - */ - hash_type operator()(const void* x, size_t n) const; - -private: - static size_t compute_seed(size_t seed, const std::string& extra); - - H3 h_; -}; - -/** - * The abstract base class for hash policies that hash elements *k* times. - */ -class HashPolicy { -public: - /** - * Constructs the hashing policy used by the implementation. This factory - * function exists because the HashingPolicy class hierachy is not yet - * serializable. - */ - static HashPolicy* Create(size_t k, const std::string& name); - - typedef Hasher::hash_type hash_type; - typedef std::vector hash_vector; - - virtual ~HashPolicy() { } - - virtual hash_vector Hash(const void* x, size_t n) const = 0; - - size_t K() const { return k_; } - const std::string& Name() const { return name_; } - -protected: - HashPolicy(size_t k, const std::string& name); - -private: - const size_t k_; - std::string name_; -}; - -/** - * The default hashing policy. Performs *k* hash function computations. - */ -class DefaultHashing : public HashPolicy { -public: - DefaultHashing(size_t k, const std::string& name); - - virtual hash_vector Hash(const void* x, size_t n) const /* override */; - -private: - std::vector hashers_; -}; - -/** - * The *double-hashing* policy. Uses a linear combination of two hash functions. - */ -class DoubleHashing : public HashPolicy { -public: - DoubleHashing(size_t k, const std::string& name); - - virtual hash_vector Hash(const void* x, size_t n) const; - -private: - Hasher hasher1_; - Hasher hasher2_; -}; - -#endif diff --git a/src/Hasher.cc b/src/Hasher.cc new file mode 100644 index 0000000000..045adcd174 --- /dev/null +++ b/src/Hasher.cc @@ -0,0 +1,79 @@ +#include "Hasher.h" + +#include "digest.h" + +Hasher::UHF::UHF(size_t seed, const std::string& extra) + : h_(compute_seed(seed, extra)) + { + } + +Hasher::digest Hasher::UHF::hash(const void* x, size_t n) const + { + assert(n <= UHASH_KEY_SIZE); + return n == 0 ? 0 : h_(x, n); + } + +size_t Hasher::UHF::compute_seed(size_t seed, const std::string& extra) + { + u_char buf[SHA256_DIGEST_LENGTH]; + SHA256_CTX ctx; + sha256_init(&ctx); + if ( extra.empty() ) + { + unsigned int first_seed = initial_seed(); + sha256_update(&ctx, &first_seed, sizeof(first_seed)); + } + else + { + sha256_update(&ctx, extra.c_str(), extra.size()); + } + sha256_update(&ctx, &seed, sizeof(seed)); + sha256_final(&ctx, buf); + // Take the first sizeof(size_t) bytes as seed. + return *reinterpret_cast(buf); + } + + +Hasher* Hasher::Create(size_t k, const std::string& name) + { + return new DefaultHasher(k, name); + } + +Hasher::Hasher(size_t k, const std::string& name) + : k_(k), name_(name) + { + } + +DefaultHasher::DefaultHasher(size_t k, const std::string& name) + : Hasher(k, name) + { + for ( size_t i = 0; i < k; ++i ) + hash_functions_.push_back(UHF(i, name)); + } + +Hasher::digest_vector DefaultHasher::Hash(const void* x, size_t n) const + { + digest_vector h(K(), 0); + for ( size_t i = 0; i < h.size(); ++i ) + h[i] = hash_functions_[i](x, n); + return h; + } + +DoubleHasher::DoubleHasher(size_t k, const std::string& name) + : Hasher(k, name), + h1_(1, name), + h2_(2, name) + { + } + +Hasher::digest_vector DoubleHasher::Hash(const void* x, size_t n) const + { + digest h1 = h1_(x, n); + digest h2 = h2_(x, n); + digest_vector h(K(), 0); + for ( size_t i = 0; i < h.size(); ++i ) + h[i] = h1 + i * h2; + return h; + } + + diff --git a/src/Hasher.h b/src/Hasher.h new file mode 100644 index 0000000000..8d0af6b03f --- /dev/null +++ b/src/Hasher.h @@ -0,0 +1,109 @@ +#ifndef Hasher_h +#define Hasher_h + +#include "Hash.h" +#include "H3.h" + +/** + * The abstract base class for hashers, i.e., constructs which hash elements + * *k* times. + */ +class Hasher { +public: + typedef hash_t digest; + typedef std::vector digest_vector; + + /** + * Constructs the hashing policy used by the implementation. + * + * @todo This factory function exists because the HashingPolicy class + * hierachy is not yet serializable. + */ + static Hasher* Create(size_t k, const std::string& name); + + virtual ~Hasher() { } + + template + digest_vector operator()(const T& x) const + { + return Hash(&x, sizeof(T)); + } + + virtual digest_vector Hash(const void* x, size_t n) const = 0; + + size_t K() const { return k_; } + const std::string& Name() const { return name_; } + +protected: + /** + * A universal hash function family. + */ + class UHF { + public: + /** + * Constructs an H3 hash function seeded with a given seed and an optional + * extra seed to replace the initial Bro seed. + * + * @param seed The seed to use for this instance. + * + * @param extra If not empty, this parameter replaces the initial seed to + * compute the seed for t to compute the + * seed + * NUL-terminated string as additional seed. + */ + UHF(size_t seed, const std::string& extra = ""); + + template + digest operator()(const T& x) const + { + return hash(&x, sizeof(T)); + } + + digest operator()(const void* x, size_t n) const + { + return hash(x, n); + } + + digest hash(const void* x, size_t n) const; + + private: + static size_t compute_seed(size_t seed, const std::string& extra); + + H3 h_; + }; + + Hasher(size_t k, const std::string& name); + +private: + const size_t k_; + std::string name_; +}; + +/** + * The default hashing policy. Performs *k* hash function computations. + */ +class DefaultHasher : public Hasher { +public: + DefaultHasher(size_t k, const std::string& name); + + virtual digest_vector Hash(const void* x, size_t n) const /* final */; + +private: + std::vector hash_functions_; +}; + +/** + * The *double-hashing* policy. Uses a linear combination of two hash functions. + */ +class DoubleHasher : public Hasher { +public: + DoubleHasher(size_t k, const std::string& name); + + virtual digest_vector Hash(const void* x, size_t n) const /* final */; + +private: + UHF h1_; + UHF h2_; +}; + +#endif diff --git a/src/bro.bif b/src/bro.bif index d0ce066139..71f8c0716f 100644 --- a/src/bro.bif +++ b/src/bro.bif @@ -5008,8 +5008,8 @@ function bloomfilter_basic_init%(fp: double, capacity: count, size_t cells = BasicBloomFilter::M(fp, capacity); size_t optimal_k = BasicBloomFilter::K(cells, capacity); - const HashPolicy* hp = HashPolicy::Create(optimal_k, name->CheckString()); - return new BloomFilterVal(new BasicBloomFilter(hp, cells)); + const Hasher* h = Hasher::Create(optimal_k, name->CheckString()); + return new BloomFilterVal(new BasicBloomFilter(h, cells)); %} ## Creates a counting Bloom filter. @@ -5029,11 +5029,11 @@ function bloomfilter_basic_init%(fp: double, capacity: count, function bloomfilter_counting_init%(k: count, cells: count, max: count, name: string &default=""%): opaque of bloomfilter %{ - const HashPolicy* hp = HashPolicy::Create(k, name->CheckString()); + const Hasher* h = Hasher::Create(k, name->CheckString()); uint16 width = 0; while ( max >>= 1 ) ++width; - return new BloomFilterVal(new CountingBloomFilter(hp, cells, width)); + return new BloomFilterVal(new CountingBloomFilter(h, cells, width)); %} ## Adds an element to a Bloom filter. From 79a2e4b5d5c28076a8db1857d3ea6a8891e1ef7c Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Sun, 21 Jul 2013 22:41:48 +0200 Subject: [PATCH 083/118] Implement missing CounterVector functions. --- src/CounterVector.cc | 66 ++++++++++++++++++++++++++++++++++++++------ src/CounterVector.h | 15 ++++++++++ 2 files changed, 73 insertions(+), 8 deletions(-) diff --git a/src/CounterVector.cc b/src/CounterVector.cc index 8ed4c30427..a661492313 100644 --- a/src/CounterVector.cc +++ b/src/CounterVector.cc @@ -1,5 +1,6 @@ #include "CounterVector.h" +#include #include "BitVector.h" #include "Serializer.h" @@ -15,23 +16,66 @@ CounterVector::~CounterVector() bool CounterVector::Increment(size_type cell, count_type value) { - // TODO - assert(! "not yet implemented"); + assert(cell < Size()); + assert(value != 0); + size_t lsb = cell * width_; + if (value >= Max()) + { + bool r = false; + for (size_t i = 0; i < width_; ++i) + if (! (*bits_)[lsb + i]) + { + bits_->Set(lsb + i); + if (! r) + r = true; + } + return r; + } + bool carry = false; + for (size_t i = 0; i < width_; ++i) + { + bool b1 = (*bits_)[lsb + i]; + bool b2 = value & (1 << i); + (*bits_)[lsb + i] ^= b2 != carry; // bit1 ^ bit2 ^ carry + carry = carry ? b1 || b2 : b1 && b2; + } + if (! carry) + return true; + for (size_t i = 0; i < width_; ++i) + bits_->Set(lsb + i); return false; } bool CounterVector::Decrement(size_type cell, count_type value) { - // TODO - assert(! "not yet implemented"); - return false; + assert(cell < Size()); + size_t lsb = cell * width_; + bool success; + while (value --> 0) + { + success = false; + for (size_t i = lsb; i < lsb + width_; ++i) + if ((*bits_)[i]) + { + bits_->Reset(i); + while (i && i > lsb) + bits_->Set(--i); + success = true; + break; + } + } + return success; } CounterVector::count_type CounterVector::Count(size_type cell) const { - // TODO - assert(! "not yet implemented"); - return 0; + assert(cell < Size()); + size_t cnt = 0, order = 1; + size_t lsb = cell * width_; + for (size_t i = lsb; i < lsb + width_; ++i, order <<= 1) + if ((*bits_)[i]) + cnt |= order; + return cnt; } CounterVector::size_type CounterVector::Size() const @@ -39,6 +83,12 @@ CounterVector::size_type CounterVector::Size() const return bits_->Blocks() / width_; } +size_t CounterVector::Max() const + { + return std::numeric_limits::max() + >> (std::numeric_limits::digits - width_); + } + bool CounterVector::Serialize(SerialInfo* info) const { return SerialObj::Serialize(info); diff --git a/src/CounterVector.h b/src/CounterVector.h index ecc8fe90e0..868beaca9b 100644 --- a/src/CounterVector.h +++ b/src/CounterVector.h @@ -19,6 +19,8 @@ public: * @param width The number of bits that each cell occupies. * * @param cells The number of cells in the bitvector. + * + * @pre `cells > 0 && width > 0` */ CounterVector(size_t width, size_t cells = 1024); @@ -32,6 +34,8 @@ public: * @param value The value to add to the current counter in *cell*. * * @return `true` if adding *value* to the counter in *cell* succeeded. + * + * @pre `cell < Size()` */ bool Increment(size_type cell, count_type value); @@ -43,6 +47,8 @@ public: * @param value The value to subtract from the current counter in *cell*. * * @return `true` if subtracting *value* from the counter in *cell* succeeded. + * + * @pre `cell < Size()` */ bool Decrement(size_type cell, count_type value); @@ -52,6 +58,8 @@ public: * @param cell The cell index to retrieve the count for. * * @return The counter associated with *cell*. + * + * @pre `cell < Size()` */ count_type Count(size_type cell) const; @@ -62,6 +70,13 @@ public: */ size_type Size() const; + /** + * Computes the maximum counter value. + * + * @return The maximum counter value based on the width. + */ + size_t Max() const; + bool Serialize(SerialInfo* info) const; static CounterVector* Unserialize(UnserialInfo* info); From 7a0240694ec69506b0789029ba48bb56ae703206 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Mon, 22 Jul 2013 14:07:47 +0200 Subject: [PATCH 084/118] Fix and test counting Bloom filter. --- src/BloomFilter.cc | 9 ++++--- src/CounterVector.cc | 5 ++-- src/CounterVector.h | 4 +-- src/bro.bif | 8 +++++- .../btest/Baseline/bifs.bloomfilter/output | 6 +++++ testing/btest/bifs/bloomfilter.bro | 26 ++++++++++++++++++- 6 files changed, 48 insertions(+), 10 deletions(-) diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc index f399bddeca..3c7bac80f1 100644 --- a/src/BloomFilter.cc +++ b/src/BloomFilter.cc @@ -131,9 +131,9 @@ CountingBloomFilter::CountingBloomFilter() CountingBloomFilter::CountingBloomFilter(const Hasher* hasher, size_t cells, size_t width) - : BloomFilter(hasher) + : BloomFilter(hasher), + cells_(new CounterVector(width, cells)) { - cells_ = new CounterVector(width, cells); } @@ -152,10 +152,12 @@ bool CountingBloomFilter::DoUnserialize(UnserialInfo* info) return cells_ != NULL; } +// TODO: Use partitioning in add/count to allow for reusing CMS bounds. + void CountingBloomFilter::AddImpl(const Hasher::digest_vector& h) { for ( size_t i = 0; i < h.size(); ++i ) - cells_->Increment(h[i] % cells_->Size(), 1); + cells_->Increment(h[i] % cells_->Size()); } size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const @@ -164,7 +166,6 @@ size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const std::numeric_limits::max(); for ( size_t i = 0; i < h.size(); ++i ) { - // TODO: Use partitioning. CounterVector::size_type cnt = cells_->Count(h[i] % cells_->Size()); if ( cnt < min ) min = cnt; diff --git a/src/CounterVector.cc b/src/CounterVector.cc index a661492313..831b95386f 100644 --- a/src/CounterVector.cc +++ b/src/CounterVector.cc @@ -5,7 +5,8 @@ #include "Serializer.h" CounterVector::CounterVector(size_t width, size_t cells) - : bits_(new BitVector(width * cells)), width_(width) + : bits_(new BitVector(width * cells)), + width_(width) { } @@ -80,7 +81,7 @@ CounterVector::count_type CounterVector::Count(size_type cell) const CounterVector::size_type CounterVector::Size() const { - return bits_->Blocks() / width_; + return bits_->Size() / width_; } size_t CounterVector::Max() const diff --git a/src/CounterVector.h b/src/CounterVector.h index 868beaca9b..2d99bb44d8 100644 --- a/src/CounterVector.h +++ b/src/CounterVector.h @@ -37,7 +37,7 @@ public: * * @pre `cell < Size()` */ - bool Increment(size_type cell, count_type value); + bool Increment(size_type cell, count_type value = 1); /** * Decrements a given cell. @@ -50,7 +50,7 @@ public: * * @pre `cell < Size()` */ - bool Decrement(size_type cell, count_type value); + bool Decrement(size_type cell, count_type value = 1); /** * Retrieves the counter of a given cell. diff --git a/src/bro.bif b/src/bro.bif index 71f8c0716f..a33a2248dd 100644 --- a/src/bro.bif +++ b/src/bro.bif @@ -5029,8 +5029,14 @@ function bloomfilter_basic_init%(fp: double, capacity: count, function bloomfilter_counting_init%(k: count, cells: count, max: count, name: string &default=""%): opaque of bloomfilter %{ + if ( max == 0 ) + { + reporter->Error("max counter value must be greater than 0"); + return NULL; + } + const Hasher* h = Hasher::Create(k, name->CheckString()); - uint16 width = 0; + uint16 width = 1; while ( max >>= 1 ) ++width; return new BloomFilterVal(new CountingBloomFilter(h, cells, width)); diff --git a/testing/btest/Baseline/bifs.bloomfilter/output b/testing/btest/Baseline/bifs.bloomfilter/output index 65aaa8b07c..80847a81b9 100644 --- a/testing/btest/Baseline/bifs.bloomfilter/output +++ b/testing/btest/Baseline/bifs.bloomfilter/output @@ -6,3 +6,9 @@ 1 1 1 +1 +2 +3 +3 +2 +3 diff --git a/testing/btest/bifs/bloomfilter.bro b/testing/btest/bifs/bloomfilter.bro index 3ff6a6668e..ab0bf86c22 100644 --- a/testing/btest/bifs/bloomfilter.bro +++ b/testing/btest/bifs/bloomfilter.bro @@ -1,7 +1,7 @@ # @TEST-EXEC: bro -b %INPUT >output # @TEST-EXEC: btest-diff output -event bro_init() +function test_basic_bloom_filter() { # Basic usage with counts. local bf_cnt = bloomfilter_basic_init(0.1, 1000); @@ -36,3 +36,27 @@ event bro_init() local bf_bug0 = bloomfilter_basic_init(-0.5, 42); local bf_bug1 = bloomfilter_basic_init(1.1, 42); } + +function test_counting_bloom_filter() + { + local bf = bloomfilter_counting_init(3, 16, 3); + bloomfilter_add(bf, "foo"); + print bloomfilter_lookup(bf, "foo"); # 1 + bloomfilter_add(bf, "foo"); + print bloomfilter_lookup(bf, "foo"); # 2 + bloomfilter_add(bf, "foo"); + print bloomfilter_lookup(bf, "foo"); # 3 + bloomfilter_add(bf, "foo"); + print bloomfilter_lookup(bf, "foo"); # still 3 + + bloomfilter_add(bf, "bar"); + bloomfilter_add(bf, "bar"); + print bloomfilter_lookup(bf, "bar"); # 2 + print bloomfilter_lookup(bf, "foo"); # still 3 + } + +event bro_init() + { + test_basic_bloom_filter(); + test_counting_bloom_filter(); + } From a3c61fe7eb6c43622de17df0e818def20cab7e90 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Mon, 22 Jul 2013 15:39:13 +0200 Subject: [PATCH 085/118] Use half adder for bitwise addition and subtraction. --- src/CounterVector.cc | 53 +++++++++++++++----------------------------- 1 file changed, 18 insertions(+), 35 deletions(-) diff --git a/src/CounterVector.cc b/src/CounterVector.cc index 831b95386f..f46fae1b98 100644 --- a/src/CounterVector.cc +++ b/src/CounterVector.cc @@ -20,52 +20,35 @@ bool CounterVector::Increment(size_type cell, count_type value) assert(cell < Size()); assert(value != 0); size_t lsb = cell * width_; - if (value >= Max()) - { - bool r = false; - for (size_t i = 0; i < width_; ++i) - if (! (*bits_)[lsb + i]) - { - bits_->Set(lsb + i); - if (! r) - r = true; - } - return r; - } bool carry = false; - for (size_t i = 0; i < width_; ++i) - { + for ( size_t i = 0; i < width_; ++i ) + { bool b1 = (*bits_)[lsb + i]; bool b2 = value & (1 << i); - (*bits_)[lsb + i] ^= b2 != carry; // bit1 ^ bit2 ^ carry - carry = carry ? b1 || b2 : b1 && b2; - } - if (! carry) - return true; - for (size_t i = 0; i < width_; ++i) - bits_->Set(lsb + i); - return false; + (*bits_)[lsb + i] = b1 ^ b2 ^ carry; + carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) ); + } + if ( carry ) + for ( size_t i = 0; i < width_; ++i ) + bits_->Set(lsb + i); + return ! carry; } bool CounterVector::Decrement(size_type cell, count_type value) { assert(cell < Size()); + assert(value != 0); + value = ~value + 1; // A - B := A + ~B + 1 + bool carry = false; size_t lsb = cell * width_; - bool success; - while (value --> 0) + for ( size_t i = 0; i < width_; ++i ) { - success = false; - for (size_t i = lsb; i < lsb + width_; ++i) - if ((*bits_)[i]) - { - bits_->Reset(i); - while (i && i > lsb) - bits_->Set(--i); - success = true; - break; - } + bool b1 = bits_[lsb + i]; + bool b2 = value & (1 << i); + bits_[lsb + i] = b1 ^ b2 ^ carry; + carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) ); } - return success; + return carry; } CounterVector::count_type CounterVector::Count(size_type cell) const From 9c2f57a9d9d5667d05e43efd3c8541ff9d33382a Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Mon, 22 Jul 2013 16:36:54 +0200 Subject: [PATCH 086/118] Make counter vectors mergeable. --- src/CounterVector.cc | 42 ++++++++++++++++++++++++++++++++++++++++-- src/CounterVector.h | 27 +++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 2 deletions(-) diff --git a/src/CounterVector.cc b/src/CounterVector.cc index f46fae1b98..75c62b208a 100644 --- a/src/CounterVector.cc +++ b/src/CounterVector.cc @@ -43,9 +43,9 @@ bool CounterVector::Decrement(size_type cell, count_type value) size_t lsb = cell * width_; for ( size_t i = 0; i < width_; ++i ) { - bool b1 = bits_[lsb + i]; + bool b1 = (*bits_)[lsb + i]; bool b2 = value & (1 << i); - bits_[lsb + i] = b1 ^ b2 ^ carry; + (*bits_)[lsb + i] = b1 ^ b2 ^ carry; carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) ); } return carry; @@ -67,12 +67,50 @@ CounterVector::size_type CounterVector::Size() const return bits_->Size() / width_; } +size_t CounterVector::Width() const + { + return width_; + } + size_t CounterVector::Max() const { return std::numeric_limits::max() >> (std::numeric_limits::digits - width_); } +CounterVector& CounterVector::Merge(const CounterVector& other) + { + assert(Size() == other.Size()); + assert(Width() == other.Width()); + for ( size_t cell = 0; cell < Size(); ++cell ) + { + size_t lsb = cell * width_; + bool carry = false; + for ( size_t i = 0; i < width_; ++i ) + { + bool b1 = (*bits_)[lsb + i]; + bool b2 = (*other.bits_)[lsb + i]; + (*bits_)[lsb + i] = b1 ^ b2 ^ carry; + carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) ); + } + if ( carry ) + for ( size_t i = 0; i < width_; ++i ) + bits_->Set(lsb + i); + } + return *this; + } + +CounterVector& CounterVector::operator|=(const CounterVector& other) +{ + return Merge(other); +} + +CounterVector operator|(const CounterVector& x, const CounterVector& y) +{ + CounterVector cv(x); + return cv |= y; +} + bool CounterVector::Serialize(SerialInfo* info) const { return SerialObj::Serialize(info); diff --git a/src/CounterVector.h b/src/CounterVector.h index 2d99bb44d8..4ab221ff6b 100644 --- a/src/CounterVector.h +++ b/src/CounterVector.h @@ -70,6 +70,13 @@ public: */ size_type Size() const; + /** + * Retrieves the counter width. + * + * @return The number of bits per counter. + */ + size_t Width() const; + /** * Computes the maximum counter value. * @@ -77,6 +84,26 @@ public: */ size_t Max() const; + /** + * Merges another counter vector into this instance by *adding* the counters + * of each cells. + * + * @param other The counter vector to merge into this instance. + * + * @return A reference to `*this`. + * + * @pre `Size() == other.Size() && Width() == other.Width()` + */ + CounterVector& Merge(const CounterVector& other); + + /** + * An alias for ::Merge. + */ + CounterVector& operator|=(const CounterVector& other); + + friend CounterVector operator|(const CounterVector& x, + const CounterVector& y); + bool Serialize(SerialInfo* info) const; static CounterVector* Unserialize(UnserialInfo* info); From eb64f5f9616e84295bc17537e8db57ae4f089c41 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Mon, 22 Jul 2013 18:03:55 +0200 Subject: [PATCH 087/118] Make hash functions equality comparable. --- src/H3.h | 12 ++++++ src/Hasher.cc | 101 +++++++++++++++++++++++++++++++------------------- src/Hasher.h | 18 +++++++++ 3 files changed, 93 insertions(+), 38 deletions(-) diff --git a/src/H3.h b/src/H3.h index e2dc865147..123dd6f374 100644 --- a/src/H3.h +++ b/src/H3.h @@ -58,6 +58,7 @@ #define H3_H #include +#include // The number of values representable by a byte. #define H3_BYTE_RANGE (UCHAR_MAX+1) @@ -112,6 +113,17 @@ public: return result; } + + friend bool operator==(const H3& x, const H3& y) + { + return ! std::memcmp(x.byte_lookup, y.byte_lookup, N * H3_BYTE_RANGE); + } + + friend bool operator!=(const H3& x, const H3& y) + { + return ! (x == y); + } + private: T byte_lookup[N][H3_BYTE_RANGE]; }; diff --git a/src/Hasher.cc b/src/Hasher.cc index 045adcd174..7a8d9a67e0 100644 --- a/src/Hasher.cc +++ b/src/Hasher.cc @@ -8,56 +8,69 @@ Hasher::UHF::UHF(size_t seed, const std::string& extra) } Hasher::digest Hasher::UHF::hash(const void* x, size_t n) const - { - assert(n <= UHASH_KEY_SIZE); - return n == 0 ? 0 : h_(x, n); - } + { + assert(n <= UHASH_KEY_SIZE); + return n == 0 ? 0 : h_(x, n); + } size_t Hasher::UHF::compute_seed(size_t seed, const std::string& extra) - { - u_char buf[SHA256_DIGEST_LENGTH]; - SHA256_CTX ctx; - sha256_init(&ctx); - if ( extra.empty() ) + { + u_char buf[SHA256_DIGEST_LENGTH]; + SHA256_CTX ctx; + sha256_init(&ctx); + if ( extra.empty() ) { unsigned int first_seed = initial_seed(); sha256_update(&ctx, &first_seed, sizeof(first_seed)); } else { - sha256_update(&ctx, extra.c_str(), extra.size()); + sha256_update(&ctx, extra.c_str(), extra.size()); + } + sha256_update(&ctx, &seed, sizeof(seed)); + sha256_final(&ctx, buf); + // Take the first sizeof(size_t) bytes as seed. + return *reinterpret_cast(buf); } - sha256_update(&ctx, &seed, sizeof(seed)); - sha256_final(&ctx, buf); - // Take the first sizeof(size_t) bytes as seed. - return *reinterpret_cast(buf); - } Hasher* Hasher::Create(size_t k, const std::string& name) - { - return new DefaultHasher(k, name); - } + { + return new DefaultHasher(k, name); + } Hasher::Hasher(size_t k, const std::string& name) - : k_(k), name_(name) + : k_(k), name_(name) { } DefaultHasher::DefaultHasher(size_t k, const std::string& name) - : Hasher(k, name) - { - for ( size_t i = 0; i < k; ++i ) - hash_functions_.push_back(UHF(i, name)); - } + : Hasher(k, name) + { + for ( size_t i = 0; i < k; ++i ) + hash_functions_.push_back(UHF(i, name)); + } Hasher::digest_vector DefaultHasher::Hash(const void* x, size_t n) const - { - digest_vector h(K(), 0); - for ( size_t i = 0; i < h.size(); ++i ) - h[i] = hash_functions_[i](x, n); - return h; - } + { + digest_vector h(K(), 0); + for ( size_t i = 0; i < h.size(); ++i ) + h[i] = hash_functions_[i](x, n); + return h; + } + +DefaultHasher* DefaultHasher::Clone() const + { + return new DefaultHasher(*this); + } + +bool DefaultHasher::Equals(const Hasher* other) const /* final */ + { + if ( typeid(*this) != typeid(*other) ) + return false; + const DefaultHasher* o = static_cast(other); + return hash_functions_ == o->hash_functions_; + } DoubleHasher::DoubleHasher(size_t k, const std::string& name) : Hasher(k, name), @@ -67,13 +80,25 @@ DoubleHasher::DoubleHasher(size_t k, const std::string& name) } Hasher::digest_vector DoubleHasher::Hash(const void* x, size_t n) const - { - digest h1 = h1_(x, n); - digest h2 = h2_(x, n); - digest_vector h(K(), 0); - for ( size_t i = 0; i < h.size(); ++i ) - h[i] = h1 + i * h2; - return h; - } + { + digest h1 = h1_(x, n); + digest h2 = h2_(x, n); + digest_vector h(K(), 0); + for ( size_t i = 0; i < h.size(); ++i ) + h[i] = h1 + i * h2; + return h; + } +DoubleHasher* DoubleHasher::Clone() const + { + return new DoubleHasher(*this); + } + +bool DoubleHasher::Equals(const Hasher* other) const /* final */ + { + if ( typeid(*this) != typeid(*other) ) + return false; + const DoubleHasher* o = static_cast(other); + return h1_ == o->h1_ && h2_ == o->h2_; + } diff --git a/src/Hasher.h b/src/Hasher.h index 8d0af6b03f..12393e7217 100644 --- a/src/Hasher.h +++ b/src/Hasher.h @@ -31,6 +31,10 @@ public: virtual digest_vector Hash(const void* x, size_t n) const = 0; + virtual Hasher* Clone() const = 0; + + virtual bool Equals(const Hasher* other) const = 0; + size_t K() const { return k_; } const std::string& Name() const { return name_; } @@ -64,6 +68,16 @@ protected: return hash(x, n); } + friend bool operator==(const UHF& x, const UHF& y) + { + return x.h_ == y.h_; + } + + friend bool operator!=(const UHF& x, const UHF& y) + { + return ! (x == y); + } + digest hash(const void* x, size_t n) const; private: @@ -87,6 +101,8 @@ public: DefaultHasher(size_t k, const std::string& name); virtual digest_vector Hash(const void* x, size_t n) const /* final */; + virtual DefaultHasher* Clone() const /* final */; + virtual bool Equals(const Hasher* other) const /* final */; private: std::vector hash_functions_; @@ -100,6 +116,8 @@ public: DoubleHasher(size_t k, const std::string& name); virtual digest_vector Hash(const void* x, size_t n) const /* final */; + virtual DoubleHasher* Clone() const /* final */; + virtual bool Equals(const Hasher* other) const /* final */; private: UHF h1_; From a39f980cd493e64a6bb4016c47923e8754b059dc Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Mon, 22 Jul 2013 18:11:12 +0200 Subject: [PATCH 088/118] Implement and test Bloom filter merging. --- src/BloomFilter.cc | 22 ++++++++++++++---- src/BloomFilter.h | 1 - src/CounterVector.cc | 6 +++++ src/CounterVector.h | 8 +++++++ src/Hasher.cc | 4 ++-- src/OpaqueVal.cc | 2 +- src/OpaqueVal.h | 21 ++++++++++++++--- .../btest/Baseline/bifs.bloomfilter/output | 7 ++++++ testing/btest/bifs/bloomfilter.bro | 23 ++++++++++++++++++- 9 files changed, 81 insertions(+), 13 deletions(-) diff --git a/src/BloomFilter.cc b/src/BloomFilter.cc index 3c7bac80f1..889c7bafe1 100644 --- a/src/BloomFilter.cc +++ b/src/BloomFilter.cc @@ -70,8 +70,13 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity) BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x, const BasicBloomFilter* y) { - // TODO: Ensure that x and y use the same Hasher before proceeding. + if ( ! x->hasher_->Equals(y->hasher_) ) + { + reporter->InternalError("incompatible hashers during Bloom filter merge"); + return NULL; + } BasicBloomFilter* result = new BasicBloomFilter(); + result->hasher_ = x->hasher_->Clone(); result->bits_ = new BitVector(*x->bits_ | *y->bits_); return result; } @@ -119,10 +124,17 @@ size_t BasicBloomFilter::CountImpl(const Hasher::digest_vector& h) const CountingBloomFilter* CountingBloomFilter::Merge(const CountingBloomFilter* x, const CountingBloomFilter* y) -{ - assert(! "not yet implemented"); - return NULL; -} + { + if ( ! x->hasher_->Equals(y->hasher_) ) + { + reporter->InternalError("incompatible hashers during Bloom filter merge"); + return NULL; + } + CountingBloomFilter* result = new CountingBloomFilter(); + result->hasher_ = x->hasher_->Clone(); + result->cells_ = new CounterVector(*x->cells_ | *y->cells_); + return result; + } CountingBloomFilter::CountingBloomFilter() : cells_(NULL) diff --git a/src/BloomFilter.h b/src/BloomFilter.h index 92f15c6070..070aa2dc25 100644 --- a/src/BloomFilter.h +++ b/src/BloomFilter.h @@ -57,7 +57,6 @@ protected: virtual void AddImpl(const Hasher::digest_vector& hashes) = 0; virtual size_t CountImpl(const Hasher::digest_vector& hashes) const = 0; -private: const Hasher* hasher_; }; diff --git a/src/CounterVector.cc b/src/CounterVector.cc index 75c62b208a..cf3083de9e 100644 --- a/src/CounterVector.cc +++ b/src/CounterVector.cc @@ -10,6 +10,12 @@ CounterVector::CounterVector(size_t width, size_t cells) { } +CounterVector::CounterVector(const CounterVector& other) + : bits_(new BitVector(*other.bits_)), + width_(other.width_) + { + } + CounterVector::~CounterVector() { delete bits_; diff --git a/src/CounterVector.h b/src/CounterVector.h index 4ab221ff6b..eced5956d4 100644 --- a/src/CounterVector.h +++ b/src/CounterVector.h @@ -9,6 +9,7 @@ class BitVector; * A vector of counters, each of which have a fixed number of bits. */ class CounterVector : public SerialObj { + CounterVector& operator=(const CounterVector&); public: typedef size_t size_type; typedef uint64 count_type; @@ -24,6 +25,13 @@ public: */ CounterVector(size_t width, size_t cells = 1024); + /** + * Copy-constructs a counter vector. + * + * @param other The counter vector to copy. + */ + CounterVector(const CounterVector& other); + ~CounterVector(); /** diff --git a/src/Hasher.cc b/src/Hasher.cc index 7a8d9a67e0..2a889c7e09 100644 --- a/src/Hasher.cc +++ b/src/Hasher.cc @@ -64,7 +64,7 @@ DefaultHasher* DefaultHasher::Clone() const return new DefaultHasher(*this); } -bool DefaultHasher::Equals(const Hasher* other) const /* final */ +bool DefaultHasher::Equals(const Hasher* other) const { if ( typeid(*this) != typeid(*other) ) return false; @@ -94,7 +94,7 @@ DoubleHasher* DoubleHasher::Clone() const return new DoubleHasher(*this); } -bool DoubleHasher::Equals(const Hasher* other) const /* final */ +bool DoubleHasher::Equals(const Hasher* other) const { if ( typeid(*this) != typeid(*other) ) return false; diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index 5a673c4a40..36038d679a 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -1,6 +1,5 @@ #include "OpaqueVal.h" -#include "BloomFilter.h" #include "NetVar.h" #include "Reporter.h" #include "Serializer.h" @@ -587,6 +586,7 @@ BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x, else if ( (result = DoMerge(x, y)) ) return result; + reporter->InternalError("failed to merge Bloom filters"); return NULL; } diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h index 2362fdacfc..22c3dbfade 100644 --- a/src/OpaqueVal.h +++ b/src/OpaqueVal.h @@ -3,6 +3,7 @@ #ifndef OPAQUEVAL_H #define OPAQUEVAL_H +#include "BloomFilter.h" #include "RandTest.h" #include "Val.h" #include "digest.h" @@ -137,9 +138,23 @@ private: static BloomFilterVal* DoMerge(const BloomFilterVal* x, const BloomFilterVal* y) { - const T* a = dynamic_cast(x->bloom_filter_); - const T* b = dynamic_cast(y->bloom_filter_); - return a && b ? new BloomFilterVal(T::Merge(a, b)) : NULL; + if ( typeid(*x->bloom_filter_) != typeid(*y->bloom_filter_) ) + { + reporter->InternalError("cannot merge different Bloom filter types"); + return NULL; + } + if ( typeid(T) != typeid(*x->bloom_filter_) ) + return NULL; + const T* a = static_cast(x->bloom_filter_); + const T* b = static_cast(y->bloom_filter_); + BloomFilterVal* merged = new BloomFilterVal(T::Merge(a, b)); + assert(merged); + if ( ! merged->Typify(x->Type()) ) + { + reporter->InternalError("failed to set type on merged Bloom filter"); + return NULL; + } + return merged; } BroType* type_; diff --git a/testing/btest/Baseline/bifs.bloomfilter/output b/testing/btest/Baseline/bifs.bloomfilter/output index 80847a81b9..4fe2ae1ecc 100644 --- a/testing/btest/Baseline/bifs.bloomfilter/output +++ b/testing/btest/Baseline/bifs.bloomfilter/output @@ -7,8 +7,15 @@ 1 1 1 +1 +1 +1 +1 2 3 3 2 3 +3 +3 +2 diff --git a/testing/btest/bifs/bloomfilter.bro b/testing/btest/bifs/bloomfilter.bro index ab0bf86c22..f69ddbda0c 100644 --- a/testing/btest/bifs/bloomfilter.bro +++ b/testing/btest/bifs/bloomfilter.bro @@ -35,11 +35,21 @@ function test_basic_bloom_filter() # Invalid parameters. local bf_bug0 = bloomfilter_basic_init(-0.5, 42); local bf_bug1 = bloomfilter_basic_init(1.1, 42); + + # Merging + local bf_cnt2 = bloomfilter_basic_init(0.1, 1000); + bloomfilter_add(bf_cnt2, 42); + bloomfilter_add(bf_cnt, 100); + local bf_merged = bloomfilter_merge(bf_cnt, bf_cnt2); + print bloomfilter_lookup(bf_merged, 42); + print bloomfilter_lookup(bf_merged, 84); + print bloomfilter_lookup(bf_merged, 100); + print bloomfilter_lookup(bf_merged, 168); } function test_counting_bloom_filter() { - local bf = bloomfilter_counting_init(3, 16, 3); + local bf = bloomfilter_counting_init(3, 32, 3); bloomfilter_add(bf, "foo"); print bloomfilter_lookup(bf, "foo"); # 1 bloomfilter_add(bf, "foo"); @@ -49,10 +59,21 @@ function test_counting_bloom_filter() bloomfilter_add(bf, "foo"); print bloomfilter_lookup(bf, "foo"); # still 3 + bloomfilter_add(bf, "bar"); bloomfilter_add(bf, "bar"); print bloomfilter_lookup(bf, "bar"); # 2 print bloomfilter_lookup(bf, "foo"); # still 3 + + # Merging + local bf2 = bloomfilter_counting_init(3, 32, 3); + bloomfilter_add(bf2, "baz"); + bloomfilter_add(bf2, "baz"); + bloomfilter_add(bf2, "bar"); + local bf_merged = bloomfilter_merge(bf, bf2); + print bloomfilter_lookup(bf_merged, "foo"); + print bloomfilter_lookup(bf_merged, "bar"); + print bloomfilter_lookup(bf_merged, "baz"); } event bro_init() From 5c3bf14d168cca9af75e0ac642de8049f89cf525 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Mon, 22 Jul 2013 14:02:56 -0400 Subject: [PATCH 089/118] Fixed a scriptland state issue that manifested especially badly on proxies. --- scripts/base/protocols/irc/dcc-send.bro | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/base/protocols/irc/dcc-send.bro b/scripts/base/protocols/irc/dcc-send.bro index 0a7f27e438..3194766946 100644 --- a/scripts/base/protocols/irc/dcc-send.bro +++ b/scripts/base/protocols/irc/dcc-send.bro @@ -185,5 +185,6 @@ event expected_connection_seen(c: connection, a: Analyzer::Tag) &priority=10 event connection_state_remove(c: connection) &priority=-5 { - delete dcc_expected_transfers[c$id$resp_h, c$id$resp_p]; + if ( [c$id$resp_h, c$id$resp_p] in dcc_expected_transfers ) + delete dcc_expected_transfers[c$id$resp_h, c$id$resp_p]; } From 325f0c2a3f087508dc0817739b9c312bcc5873d5 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Mon, 22 Jul 2013 14:15:35 -0500 Subject: [PATCH 090/118] Coverage test fixes and whitespace/doc tweaks. --- doc/scripts/DocSourcesList.cmake | 3 ++ scripts/base/utils/active-http.bro | 26 ++++++------ scripts/base/utils/exec.bro | 40 +++++++++---------- .../canonified_loaded_scripts.log | 13 +++--- 4 files changed, 42 insertions(+), 40 deletions(-) diff --git a/doc/scripts/DocSourcesList.cmake b/doc/scripts/DocSourcesList.cmake index 529b03ca83..bd264bfcb4 100644 --- a/doc/scripts/DocSourcesList.cmake +++ b/doc/scripts/DocSourcesList.cmake @@ -164,9 +164,12 @@ rest_target(${psd} base/protocols/ssl/main.bro) rest_target(${psd} base/protocols/ssl/mozilla-ca-list.bro) rest_target(${psd} base/protocols/syslog/consts.bro) rest_target(${psd} base/protocols/syslog/main.bro) +rest_target(${psd} base/utils/active-http.bro) rest_target(${psd} base/utils/addrs.bro) rest_target(${psd} base/utils/conn-ids.bro) +rest_target(${psd} base/utils/dir.bro) rest_target(${psd} base/utils/directions-and-hosts.bro) +rest_target(${psd} base/utils/exec.bro) rest_target(${psd} base/utils/files.bro) rest_target(${psd} base/utils/numbers.bro) rest_target(${psd} base/utils/paths.bro) diff --git a/scripts/base/utils/active-http.bro b/scripts/base/utils/active-http.bro index 5522cc108a..3f475a378b 100644 --- a/scripts/base/utils/active-http.bro +++ b/scripts/base/utils/active-http.bro @@ -1,21 +1,21 @@ -##! A module for performing active HTTP requests and +##! A module for performing active HTTP requests and ##! getting the reply at runtime. @load ./exec module ActiveHTTP; - + export { ## The default timeout for HTTP requests. const default_max_time = 1min &redef; - + ## The default HTTP method/verb to use for requests. const default_method = "GET" &redef; - - type Response: record { + + type Response: record { ## Numeric response code from the server. code: count; - ## String response messgae from the server. + ## String response message from the server. msg: string; ## Full body of the response. body: string &optional; @@ -29,24 +29,24 @@ export { ## The HTTP method/verb to use for the request. method: string &default=default_method; ## Data to send to the server in the client body. Keep in - ## mind that you will probably need to set the $method field + ## mind that you will probably need to set the *method* field ## to "POST" or "PUT". client_data: string &optional; - ## Arbitrary headers to pass to the server. Some headers + ## Arbitrary headers to pass to the server. Some headers ## will be included by libCurl. #custom_headers: table[string] of string &optional; ## Timeout for the request. max_time: interval &default=default_max_time; - ## Additional curl command line arguments. Be very careful + ## Additional curl command line arguments. Be very careful ## with this option since shell injection could take place ## if careful handling of untrusted data is not applied. addl_curl_args: string &optional; }; ## Perform an HTTP request according to the :bro:type:`Request` record. - ## This is an asynchronous function and must be called within a "when" + ## This is an asynchronous function and must be called within a "when" ## statement. - ## + ## ## req: A record instance representing all options for an HTTP request. ## ## Returns: A record with the full response message. @@ -55,7 +55,7 @@ export { function request2curl(r: Request, bodyfile: string, headersfile: string): string { - local cmd = fmt("curl -s -g -o \"%s\" -D \"%s\" -X \"%s\"", + local cmd = fmt("curl -s -g -o \"%s\" -D \"%s\" -X \"%s\"", str_shell_escape(bodyfile), str_shell_escape(headersfile), str_shell_escape(r$method)); @@ -91,7 +91,7 @@ function request(req: Request): ActiveHTTP::Response # If there is no response line then nothing else will work either. if ( ! (result?$files && headersfile in result$files) ) Reporter::error(fmt("There was a failure when requesting \"%s\" with ActiveHTTP.", req$url)); - + local headers = result$files[headersfile]; for ( i in headers ) { diff --git a/scripts/base/utils/exec.bro b/scripts/base/utils/exec.bro index 45cd8cb287..f896a68064 100644 --- a/scripts/base/utils/exec.bro +++ b/scripts/base/utils/exec.bro @@ -1,6 +1,4 @@ ##! A module for executing external command line programs. -##! This requires code that is still in topic branches and -##! definitely won't currently work on any released version of Bro. @load base/frameworks/input @@ -8,15 +6,13 @@ module Exec; export { type Command: record { - ## The command line to execute. - ## Use care to avoid injection attacks! + ## The command line to execute. Use care to avoid injection attacks. + ## I.e. if the command uses untrusted/variable data, sanitize it. cmd: string; - ## Provide standard in to the program as a - ## string. + ## Provide standard in to the program as a string. stdin: string &default=""; - ## If additional files are required to be read - ## in as part of the output of the command they - ## can be defined here. + ## If additional files are required to be read in as part of the output + ## of the command they can be defined here. read_files: set[string] &optional; }; @@ -27,7 +23,7 @@ export { signal_exit: bool &default=F; ## Each line of standard out. stdout: vector of string &optional; - ## Each line of standard error. + ## Each line of standard error. stderr: vector of string &optional; ## If additional files were requested to be read in ## the content of the files will be available here. @@ -35,7 +31,7 @@ export { }; ## Function for running command line programs and getting - ## output. This is an asynchronous function which is meant + ## output. This is an asynchronous function which is meant ## to be run with the `when` statement. ## ## cmd: The command to run. Use care to avoid injection attacks! @@ -56,12 +52,12 @@ redef record Command += { global results: table[string] of Result = table(); global finished_commands: set[string]; global currently_tracked_files: set[string] = set(); -type OneLine: record { +type OneLine: record { s: string; is_stderr: bool; }; -type FileLine: record { +type FileLine: record { s: string; }; @@ -93,7 +89,7 @@ event Exec::file_line(description: Input::EventDescription, tpe: Input::Event, s local result = results[name]; if ( ! result?$files ) result$files = table(); - + if ( track_file !in result$files ) result$files[track_file] = vector(s); else @@ -136,16 +132,16 @@ function run(cmd: Command): Result } } - local config_strings: table[string] of string = { + local config_strings: table[string] of string = { ["stdin"] = cmd$stdin, ["read_stderr"] = "1", }; - Input::add_event([$name=cmd$uid, - $source=fmt("%s |", cmd$cmd), - $reader=Input::READER_RAW, - $fields=Exec::OneLine, - $ev=Exec::line, - $want_record=F, + Input::add_event([$name=cmd$uid, + $source=fmt("%s |", cmd$cmd), + $reader=Input::READER_RAW, + $fields=Exec::OneLine, + $ev=Exec::line, + $want_record=F, $config=config_strings]); return when ( cmd$uid in finished_commands ) @@ -164,4 +160,4 @@ event bro_done() { system(fmt("rm \"%s\"", str_shell_escape(fname))); } - } \ No newline at end of file + } diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log index 999fd7c841..37f1c739f8 100644 --- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2013-07-10-21-18-31 +#open 2013-07-22-16-01-22 #fields name #types string scripts/base/init-bare.bro @@ -90,12 +90,17 @@ scripts/base/init-bare.bro scripts/base/init-default.bro scripts/base/utils/site.bro scripts/base/utils/patterns.bro + scripts/base/utils/active-http.bro + scripts/base/utils/exec.bro scripts/base/utils/addrs.bro scripts/base/utils/conn-ids.bro + scripts/base/utils/dir.bro + scripts/base/frameworks/reporter/__load__.bro + scripts/base/frameworks/reporter/main.bro + scripts/base/utils/paths.bro scripts/base/utils/directions-and-hosts.bro scripts/base/utils/files.bro scripts/base/utils/numbers.bro - scripts/base/utils/paths.bro scripts/base/utils/queue.bro scripts/base/utils/strings.bro scripts/base/utils/thresholds.bro @@ -129,8 +134,6 @@ scripts/base/init-default.bro scripts/base/frameworks/intel/__load__.bro scripts/base/frameworks/intel/main.bro scripts/base/frameworks/intel/input.bro - scripts/base/frameworks/reporter/__load__.bro - scripts/base/frameworks/reporter/main.bro scripts/base/frameworks/sumstats/__load__.bro scripts/base/frameworks/sumstats/main.bro scripts/base/frameworks/sumstats/plugins/__load__.bro @@ -195,4 +198,4 @@ scripts/base/init-default.bro scripts/base/protocols/tunnels/__load__.bro scripts/base/misc/find-checksum-offloading.bro scripts/policy/misc/loaded-scripts.bro -#close 2013-07-10-21-18-31 +#close 2013-07-22-16-01-22 From 636914b8f12a27145ce2fcb2b4e1e4be8f6ad381 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Mon, 22 Jul 2013 17:01:31 -0400 Subject: [PATCH 091/118] Some tests work now (at least they all don't fail anymore!) --- testing/btest/btest.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/btest/btest.cfg b/testing/btest/btest.cfg index 4a13833094..7ccf99eea8 100644 --- a/testing/btest/btest.cfg +++ b/testing/btest/btest.cfg @@ -7,7 +7,7 @@ IgnoreFiles = *.tmp *.swp #* *.trace .DS_Store [environment] BROPATH=`bash -c %(testbase)s/../../build/bro-path-dev` -BROMAGIC=%(testbase)s/../../magic +BROMAGIC=%(testbase)s/../../magic/database BRO_SEED_FILE=%(testbase)s/random.seed TZ=UTC LC_ALL=C From f098b17429151d2169aff30ead87801146fb376f Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Tue, 23 Jul 2013 11:18:49 -0400 Subject: [PATCH 092/118] A few test updates. --- scripts/base/protocols/irc/files.bro | 3 -- .../policy/frameworks/files/detect-MHR.bro | 2 +- .../Baseline/core.tunnels.ayiya/http.log | 10 +++--- .../canonified_loaded_scripts.log | 31 ++++++++++--------- .../out | 3 +- .../out | 15 +++++++++ .../http.log | 8 ++--- .../notice.log | 10 +++--- .../smtp_entities.log | 12 ------- .../scripts/base/protocols/smtp/mime.test | 6 ---- testing/scripts/file-analysis-test.bro | 18 +++++------ 11 files changed, 57 insertions(+), 61 deletions(-) delete mode 100644 testing/btest/Baseline/scripts.base.protocols.smtp.mime/smtp_entities.log delete mode 100644 testing/btest/scripts/base/protocols/smtp/mime.test diff --git a/scripts/base/protocols/irc/files.bro b/scripts/base/protocols/irc/files.bro index a6321d3f2f..7e077c8331 100644 --- a/scripts/base/protocols/irc/files.bro +++ b/scripts/base/protocols/irc/files.bro @@ -16,9 +16,6 @@ export { function get_file_handle(c: connection, is_orig: bool): string { - if ( [c$id$resp_h, c$id$resp_p] !in dcc_expected_transfers ) - return ""; - return cat(Analyzer::ANALYZER_IRC_DATA, c$start_time, c$id, is_orig); } diff --git a/scripts/policy/frameworks/files/detect-MHR.bro b/scripts/policy/frameworks/files/detect-MHR.bro index 71d73217e0..8a2e33b7f4 100644 --- a/scripts/policy/frameworks/files/detect-MHR.bro +++ b/scripts/policy/frameworks/files/detect-MHR.bro @@ -47,7 +47,7 @@ event file_hash(f: fa_file, kind: string, hash: string) local readable_first_detected = strftime("%Y-%m-%d %H:%M:%S", mhr_first_detected); if ( mhr_detect_rate >= notice_threshold ) { - local message = fmt("Detection rate: %d%% Last seen: %s", mhr_detect_rate, readable_first_detected); + local message = fmt("Malware Hash Registry Detection rate: %d%% Last seen: %s", mhr_detect_rate, readable_first_detected); local virustotal_url = fmt("https://www.virustotal.com/en/file/%s/analysis/", hash); NOTICE([$note=Match, $msg=message, $sub=virustotal_url, $f=f]); } diff --git a/testing/btest/Baseline/core.tunnels.ayiya/http.log b/testing/btest/Baseline/core.tunnels.ayiya/http.log index cd49c4cc89..04692a3547 100644 --- a/testing/btest/Baseline/core.tunnels.ayiya/http.log +++ b/testing/btest/Baseline/core.tunnels.ayiya/http.log @@ -3,10 +3,10 @@ #empty_field (empty) #unset_field - #path http -#open 2013-05-21-21-11-20 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extracted_request_files extracted_response_files -#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string vector[string] vector[string] -1257655301.652206 5OKnoww6xl4 2001:4978:f:4c::2 53382 2001:4860:b002::68 80 1 GET ipv6.google.com / - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en; rv:1.9.0.15pre) Gecko/2009091516 Camino/2.0b4 (like Firefox/3.0.15pre) 0 10102 200 OK - - - (empty) - - - text/html - - - +#open 2013-07-23-05-12-58 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied orig_fuids orig_mime_types resp_fuids resp_mime_types +#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] vector[string] vector[string] vector[string] vector[string] +1257655301.652206 5OKnoww6xl4 2001:4978:f:4c::2 53382 2001:4860:b002::68 80 1 GET ipv6.google.com / - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en; rv:1.9.0.15pre) Gecko/2009091516 Camino/2.0b4 (like Firefox/3.0.15pre) 0 10102 200 OK - - - (empty) - - - - - meGKu6goEyd application/octet-stream 1257655302.514424 5OKnoww6xl4 2001:4978:f:4c::2 53382 2001:4860:b002::68 80 2 GET ipv6.google.com /csi?v=3&s=webhp&action=&tran=undefined&e=17259,19771,21517,21766,21887,22212&ei=BUz2Su7PMJTglQfz3NzCAw&rt=prt.77,xjs.565,ol.645 http://ipv6.google.com/ Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en; rv:1.9.0.15pre) Gecko/2009091516 Camino/2.0b4 (like Firefox/3.0.15pre) 0 0 204 No Content - - - (empty) - - - - - - - 1257655303.603569 5OKnoww6xl4 2001:4978:f:4c::2 53382 2001:4860:b002::68 80 3 GET ipv6.google.com /gen_204?atyp=i&ct=fade&cad=1254&ei=BUz2Su7PMJTglQfz3NzCAw&zx=1257655303600 http://ipv6.google.com/ Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en; rv:1.9.0.15pre) Gecko/2009091516 Camino/2.0b4 (like Firefox/3.0.15pre) 0 0 204 No Content - - - (empty) - - - - - - - -#close 2013-05-21-21-11-20 +#close 2013-07-23-05-12-58 diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log index 999fd7c841..f67d4b6158 100644 --- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2013-07-10-21-18-31 +#open 2013-07-23-05-48-10 #fields name #types string scripts/base/init-bare.bro @@ -84,12 +84,12 @@ scripts/base/init-bare.bro scripts/base/frameworks/analyzer/main.bro scripts/base/frameworks/packet-filter/utils.bro build/scripts/base/bif/analyzer.bif.bro - scripts/base/frameworks/file-analysis/__load__.bro - scripts/base/frameworks/file-analysis/main.bro + scripts/base/frameworks/files/__load__.bro + scripts/base/frameworks/files/main.bro build/scripts/base/bif/file_analysis.bif.bro + scripts/base/utils/site.bro + scripts/base/utils/patterns.bro scripts/base/init-default.bro - scripts/base/utils/site.bro - scripts/base/utils/patterns.bro scripts/base/utils/addrs.bro scripts/base/utils/conn-ids.bro scripts/base/utils/directions-and-hosts.bro @@ -157,8 +157,8 @@ scripts/base/init-default.bro scripts/base/protocols/ftp/__load__.bro scripts/base/protocols/ftp/utils-commands.bro scripts/base/protocols/ftp/main.bro - scripts/base/protocols/ftp/file-analysis.bro - scripts/base/protocols/ftp/file-extract.bro + scripts/base/protocols/ftp/utils.bro + scripts/base/protocols/ftp/files.bro scripts/base/protocols/ftp/gridftp.bro scripts/base/protocols/ssl/__load__.bro scripts/base/protocols/ssl/consts.bro @@ -166,15 +166,13 @@ scripts/base/init-default.bro scripts/base/protocols/ssl/mozilla-ca-list.bro scripts/base/protocols/http/__load__.bro scripts/base/protocols/http/main.bro + scripts/base/protocols/http/entities.bro scripts/base/protocols/http/utils.bro - scripts/base/protocols/http/file-analysis.bro - scripts/base/protocols/http/file-ident.bro - scripts/base/protocols/http/file-hash.bro - scripts/base/protocols/http/file-extract.bro + scripts/base/protocols/http/files.bro scripts/base/protocols/irc/__load__.bro scripts/base/protocols/irc/main.bro scripts/base/protocols/irc/dcc-send.bro - scripts/base/protocols/irc/file-analysis.bro + scripts/base/protocols/irc/files.bro scripts/base/protocols/modbus/__load__.bro scripts/base/protocols/modbus/consts.bro scripts/base/protocols/modbus/main.bro @@ -182,8 +180,7 @@ scripts/base/init-default.bro scripts/base/protocols/smtp/__load__.bro scripts/base/protocols/smtp/main.bro scripts/base/protocols/smtp/entities.bro - scripts/base/protocols/smtp/entities-excerpt.bro - scripts/base/protocols/smtp/file-analysis.bro + scripts/base/protocols/smtp/files.bro scripts/base/protocols/socks/__load__.bro scripts/base/protocols/socks/consts.bro scripts/base/protocols/socks/main.bro @@ -193,6 +190,10 @@ scripts/base/init-default.bro scripts/base/protocols/syslog/consts.bro scripts/base/protocols/syslog/main.bro scripts/base/protocols/tunnels/__load__.bro + scripts/base/files/hash/__load__.bro + scripts/base/files/hash/main.bro + scripts/base/files/extract/__load__.bro + scripts/base/files/extract/main.bro scripts/base/misc/find-checksum-offloading.bro scripts/policy/misc/loaded-scripts.bro -#close 2013-07-10-21-18-31 +#close 2013-07-23-05-48-10 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out index 4463db6958..c810ce15e5 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out @@ -3,7 +3,8 @@ file #0, 0, 0 FILE_BOF_BUFFER The Nationa MIME_TYPE -text/x-pascal +application/octet-stream +FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 16557, 0 [orig_h=141.142.228.5, orig_p=50737/tcp, resp_h=141.142.192.162, resp_p=38141/tcp] diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out index 36da7bdeed..fcd30b2253 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out @@ -4,6 +4,21 @@ FILE_BOF_BUFFER PK^C^D^T\0\0\0^H\0\xae MIME_TYPE application/zip +FILE_OVER_NEW_CONNECTION +FILE_NEW +file #1, 0, 0 +FILE_BOF_BUFFER +\0\0^Ex\0\0^J\xf0\0\0^P +MIME_TYPE +application/octet-stream +FILE_OVER_NEW_CONNECTION +FILE_STATE_REMOVE +file #1, 124, 0 +[orig_h=192.168.1.77, orig_p=57655/tcp, resp_h=209.197.168.151, resp_p=1024/tcp] +source: IRC_DATA +MD5: 35288fd50a74c7d675909ff83424d7a1 +SHA1: 8a98f177cb47e6bf771bf57c2f7e94c4b5e79ffa +SHA256: b24dde52b933a0d76e885ab418cb6d697b14a4e2fef45fce66e12ecc5a6a81aa FILE_STATE_REMOVE file #0, 42208, 0 [orig_h=192.168.1.77, orig_p=57655/tcp, resp_h=209.197.168.151, resp_p=1024/tcp] diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.writer-path-conflict/http.log b/testing/btest/Baseline/scripts.base.frameworks.logging.writer-path-conflict/http.log index 6b7bea88c9..8f9d553d9a 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.logging.writer-path-conflict/http.log +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.writer-path-conflict/http.log @@ -3,9 +3,9 @@ #empty_field (empty) #unset_field - #path http -#open 2013-05-21-21-11-23 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extracted_request_files extracted_response_files -#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string vector[string] vector[string] +#open 2013-07-23-05-48-35 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied orig_fuids orig_mime_types resp_fuids resp_mime_types +#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] vector[string] vector[string] vector[string] vector[string] 1300475168.784020 j4u32Pc5bif 141.142.220.118 48649 208.80.152.118 80 1 GET bits.wikimedia.org /skins-1.5/monobook/main.css http://www.wikipedia.org/ Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.15) Gecko/20110303 Ubuntu/10.04 (lucid) Firefox/3.6.15 0 0 304 Not Modified - - - (empty) - - - - - - - 1300475168.916018 VW0XPVINV8a 141.142.220.118 49997 208.80.152.3 80 1 GET upload.wikimedia.org /wikipedia/commons/6/63/Wikipedia-logo.png http://www.wikipedia.org/ Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.15) Gecko/20110303 Ubuntu/10.04 (lucid) Firefox/3.6.15 0 0 304 Not Modified - - - (empty) - - - - - - - 1300475168.916183 3PKsZ2Uye21 141.142.220.118 49996 208.80.152.3 80 1 GET upload.wikimedia.org /wikipedia/commons/thumb/b/bb/Wikipedia_wordmark.svg/174px-Wikipedia_wordmark.svg.png http://www.wikipedia.org/ Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.15) Gecko/20110303 Ubuntu/10.04 (lucid) Firefox/3.6.15 0 0 304 Not Modified - - - (empty) - - - - - - - @@ -20,4 +20,4 @@ 1300475169.014619 Tw8jXtpTGu6 141.142.220.118 50000 208.80.152.3 80 2 GET upload.wikimedia.org /wikipedia/commons/thumb/4/4a/Commons-logo.svg/35px-Commons-logo.svg.png http://www.wikipedia.org/ Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.15) Gecko/20110303 Ubuntu/10.04 (lucid) Firefox/3.6.15 0 0 304 Not Modified - - - (empty) - - - - - - - 1300475169.014593 P654jzLoe3a 141.142.220.118 49999 208.80.152.3 80 2 GET upload.wikimedia.org /wikipedia/commons/thumb/9/91/Wikiversity-logo.svg/35px-Wikiversity-logo.svg.png http://www.wikipedia.org/ Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.15) Gecko/20110303 Ubuntu/10.04 (lucid) Firefox/3.6.15 0 0 304 Not Modified - - - (empty) - - - - - - - 1300475169.014927 0Q4FH8sESw5 141.142.220.118 50001 208.80.152.3 80 2 GET upload.wikimedia.org /wikipedia/commons/thumb/7/75/Wikimedia_Community_Logo.svg/35px-Wikimedia_Community_Logo.svg.png http://www.wikipedia.org/ Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.15) Gecko/20110303 Ubuntu/10.04 (lucid) Firefox/3.6.15 0 0 304 Not Modified - - - (empty) - - - - - - - -#close 2013-05-21-21-11-23 +#close 2013-07-23-05-48-35 diff --git a/testing/btest/Baseline/scripts.base.protocols.ftp.gridftp/notice.log b/testing/btest/Baseline/scripts.base.protocols.ftp.gridftp/notice.log index 051f1c6266..04c80407f6 100644 --- a/testing/btest/Baseline/scripts.base.protocols.ftp.gridftp/notice.log +++ b/testing/btest/Baseline/scripts.base.protocols.ftp.gridftp/notice.log @@ -3,8 +3,8 @@ #empty_field (empty) #unset_field - #path notice -#open 2013-04-02-02-19-21 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto note msg sub src dst p n peer_descr actions suppress_for dropped remote_location.country_code remote_location.region remote_location.city remote_location.latitude remote_location.longitude -#types time string addr port addr port enum enum string string addr addr port count string table[enum] interval bool string string string double double -1348168976.558309 arKYeMETxOg 192.168.57.103 35391 192.168.57.101 55968 tcp GridFTP::Data_Channel GridFTP data channel over threshold 2 bytes - 192.168.57.103 192.168.57.101 55968 - bro Notice::ACTION_LOG 3600.000000 F - - - - - -#close 2013-04-02-02-19-21 +#open 2013-07-23-05-19-25 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p fuid file_mime_type file_desc proto note msg sub src dst p n peer_descr actions suppress_for dropped remote_location.country_code remote_location.region remote_location.city remote_location.latitude remote_location.longitude +#types time string addr port addr port string string string enum enum string string addr addr port count string table[enum] interval bool string string string double double +1348168976.558309 arKYeMETxOg 192.168.57.103 35391 192.168.57.101 55968 - - - tcp GridFTP::Data_Channel GridFTP data channel over threshold 2 bytes - 192.168.57.103 192.168.57.101 55968 - bro Notice::ACTION_LOG 3600.000000 F - - - - - +#close 2013-07-23-05-19-25 diff --git a/testing/btest/Baseline/scripts.base.protocols.smtp.mime/smtp_entities.log b/testing/btest/Baseline/scripts.base.protocols.smtp.mime/smtp_entities.log deleted file mode 100644 index 135c644855..0000000000 --- a/testing/btest/Baseline/scripts.base.protocols.smtp.mime/smtp_entities.log +++ /dev/null @@ -1,12 +0,0 @@ -#separator \x09 -#set_separator , -#empty_field (empty) -#unset_field - -#path smtp_entities -#open 2013-03-26-20-39-07 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth filename content_len mime_type md5 extraction_file excerpt -#types time string addr port addr port count string count string string string string -1254722770.692743 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 - 79 text/plain 92bca2e6cdcde73647125da7dccbdd07 - (empty) -1254722770.692743 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 - 1918 text/html - - (empty) -1254722770.692804 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 NEWS.txt 10823 text/plain a968bb0f9f9d95835b2e74c845877e87 - (empty) -#close 2013-03-26-20-39-07 diff --git a/testing/btest/scripts/base/protocols/smtp/mime.test b/testing/btest/scripts/base/protocols/smtp/mime.test deleted file mode 100644 index 8e7a336987..0000000000 --- a/testing/btest/scripts/base/protocols/smtp/mime.test +++ /dev/null @@ -1,6 +0,0 @@ -# @TEST-EXEC: bro -r $TRACES/smtp.trace %INPUT -# @TEST-EXEC: btest-diff smtp_entities.log - -@load base/protocols/smtp - -redef SMTP::generate_md5=/text\/plain/; diff --git a/testing/scripts/file-analysis-test.bro b/testing/scripts/file-analysis-test.bro index cf2bbf2d59..8fe78b218e 100644 --- a/testing/scripts/file-analysis-test.bro +++ b/testing/scripts/file-analysis-test.bro @@ -1,7 +1,7 @@ global test_file_analysis_source: string = "" &redef; -global test_file_analyzers: set[Files::AnalyzerArgs]; +global test_file_analyzers: set[Files::Tag]; global test_get_file_name: function(f: fa_file): string = function(f: fa_file): string { return ""; } &redef; @@ -46,11 +46,11 @@ event file_new(f: fa_file) local filename: string = test_get_file_name(f); if ( filename != "" ) - Files::add_analyzer(f, [$tag=Files::ANALYZER_EXTRACT, - $extract_filename=filename]); - Files::add_analyzer(f, [$tag=Files::ANALYZER_DATA_EVENT, - $chunk_event=file_chunk, - $stream_event=file_stream]); + Files::add_analyzer(f, Files::ANALYZER_EXTRACT, + [$extract_filename=filename]); + Files::add_analyzer(f, Files::ANALYZER_DATA_EVENT, + [$chunk_event=file_chunk, + $stream_event=file_stream]); } if ( f?$bof_buffer ) @@ -106,7 +106,7 @@ event file_state_remove(f: fa_file) event bro_init() { - add test_file_analyzers[[$tag=Files::ANALYZER_MD5]]; - add test_file_analyzers[[$tag=Files::ANALYZER_SHA1]]; - add test_file_analyzers[[$tag=Files::ANALYZER_SHA256]]; + add test_file_analyzers[Files::ANALYZER_MD5]; + add test_file_analyzers[Files::ANALYZER_SHA1]; + add test_file_analyzers[Files::ANALYZER_SHA256]; } From 73eb87a41ef5d79f5f84d8aebe42ce9b61aadc5a Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 23 Jul 2013 14:16:39 -0500 Subject: [PATCH 093/118] Exec module changes/fixes. - Give Dir::monitor() a param for the polling interval, so different dirs can be monitored at different frequencies. - Fix race in Exec::run() when reading extra output files produced by a process -- it was possible for Exec::run() to return before all extra output files had been fully read. - Add test cases. --- scripts/base/utils/active-http.bro | 3 + scripts/base/utils/dir.bro | 34 +++++--- scripts/base/utils/exec.bro | 85 ++++++++++++------- .../bro..stdout | 5 ++ .../scripts.base.utils.dir/bro..stdout | 10 +++ .../scripts.base.utils.exec/bro..stdout | 7 ++ .../btest/scripts/base/utils/active-http.test | 25 ++++++ testing/btest/scripts/base/utils/dir.test | 58 +++++++++++++ testing/btest/scripts/base/utils/exec.test | 74 ++++++++++++++++ testing/scripts/httpd.py | 40 +++++++++ 10 files changed, 299 insertions(+), 42 deletions(-) create mode 100644 testing/btest/Baseline/scripts.base.utils.active-http/bro..stdout create mode 100644 testing/btest/Baseline/scripts.base.utils.dir/bro..stdout create mode 100644 testing/btest/Baseline/scripts.base.utils.exec/bro..stdout create mode 100644 testing/btest/scripts/base/utils/active-http.test create mode 100644 testing/btest/scripts/base/utils/dir.test create mode 100644 testing/btest/scripts/base/utils/exec.test create mode 100755 testing/scripts/httpd.py diff --git a/scripts/base/utils/active-http.bro b/scripts/base/utils/active-http.bro index 3f475a378b..eb9a212221 100644 --- a/scripts/base/utils/active-http.bro +++ b/scripts/base/utils/active-http.bro @@ -90,7 +90,10 @@ function request(req: Request): ActiveHTTP::Response { # If there is no response line then nothing else will work either. if ( ! (result?$files && headersfile in result$files) ) + { Reporter::error(fmt("There was a failure when requesting \"%s\" with ActiveHTTP.", req$url)); + return resp; + } local headers = result$files[headersfile]; for ( i in headers ) diff --git a/scripts/base/utils/dir.bro b/scripts/base/utils/dir.bro index b154fe000e..3329dc6306 100644 --- a/scripts/base/utils/dir.bro +++ b/scripts/base/utils/dir.bro @@ -5,6 +5,10 @@ module Dir; export { + ## The default interval this module checks for files in directories when + ## using the :bro:see:`Dir::monitor` function. + const polling_interval = 30sec &redef; + ## Register a directory to monitor with a callback that is called ## every time a previously unseen file is seen. If a file is deleted ## and seen to be gone, the file is available for being seen again in @@ -14,14 +18,15 @@ export { ## ## callback: Callback that gets executed with each file name ## that is found. Filenames are provided with the full path. - global monitor: function(dir: string, callback: function(fname: string)); - - ## The interval this module checks for files in directories when using - ## the :bro:see:`Dir::monitor` function. - const polling_interval = 30sec &redef; + ## + ## poll_interval: An interval at which to check for new files. + global monitor: function(dir: string, callback: function(fname: string), + poll_interval: interval &default=polling_interval); } -event Dir::monitor_ev(dir: string, last_files: set[string], callback: function(fname: string)) +event Dir::monitor_ev(dir: string, last_files: set[string], + callback: function(fname: string), + poll_interval: interval) { when ( local result = Exec::run([$cmd=fmt("ls -i \"%s/\"", str_shell_escape(dir))]) ) { @@ -32,7 +37,11 @@ event Dir::monitor_ev(dir: string, last_files: set[string], callback: function(f } local current_files: set[string] = set(); - local files = result$stdout; + local files: vector of string = vector(); + + if ( result?$stdout ) + files = result$stdout; + for ( i in files ) { local parts = split1(files[i], / /); @@ -40,13 +49,18 @@ event Dir::monitor_ev(dir: string, last_files: set[string], callback: function(f callback(build_path_compressed(dir, parts[2])); add current_files[parts[1]]; } - schedule polling_interval { Dir::monitor_ev(dir, current_files, callback) }; + + schedule poll_interval + { + Dir::monitor_ev(dir, current_files, callback, poll_interval) + }; } } -function monitor(dir: string, callback: function(fname: string)) +function monitor(dir: string, callback: function(fname: string), + poll_interval: interval &default=polling_interval) { - event Dir::monitor_ev(dir, set(), callback); + event Dir::monitor_ev(dir, set(), callback, poll_interval); } diff --git a/scripts/base/utils/exec.bro b/scripts/base/utils/exec.bro index f896a68064..4ffae29303 100644 --- a/scripts/base/utils/exec.bro +++ b/scripts/base/utils/exec.bro @@ -14,6 +14,8 @@ export { ## If additional files are required to be read in as part of the output ## of the command they can be defined here. read_files: set[string] &optional; + # The unique id for tracking executors. + uid: string &default=unique_id(""); }; type Result: record { @@ -44,14 +46,11 @@ export { const tmp_dir = "/tmp" &redef; } -redef record Command += { - # The unique id for tracking executors. - uid: string &optional; -}; +# Indexed by command uid. +global results: table[string] of Result; +global pending_commands: set[string]; +global pending_files: table[string] of set[string]; -global results: table[string] of Result = table(); -global finished_commands: set[string]; -global currently_tracked_files: set[string] = set(); type OneLine: record { s: string; is_stderr: bool; @@ -96,39 +95,63 @@ event Exec::file_line(description: Input::EventDescription, tpe: Input::Event, s result$files[track_file][|result$files[track_file]|] = s; } +event Input::end_of_data(name: string, source:string) + { + local parts = split1(name, /_/); + name = parts[1]; + + if ( name !in pending_commands || |parts| < 2 ) + return; + + local track_file = parts[2]; + + Input::remove(name); + + if ( name !in pending_files ) + delete pending_commands[name]; + else + { + delete pending_files[name][track_file]; + if ( |pending_files[name]| == 0 ) + delete pending_commands[name]; + system(fmt("rm \"%s\"", str_shell_escape(track_file))); + } + } + event InputRaw::process_finished(name: string, source:string, exit_code:count, signal_exit:bool) { + if ( name !in pending_commands ) + return; + + Input::remove(name); results[name]$exit_code = exit_code; results[name]$signal_exit = signal_exit; - Input::remove(name); - # Indicate to the "when" async watcher that this command is done. - add finished_commands[name]; - } - -event Exec::start_watching_file(uid: string, read_file: string) - { - Input::add_event([$source=fmt("%s", read_file), - $name=fmt("%s_%s", uid, read_file), - $reader=Input::READER_RAW, - $mode=Input::STREAM, - $want_record=F, - $fields=FileLine, - $ev=Exec::file_line]); + if ( name !in pending_files || |pending_files[name]| == 0 ) + # No extra files to read, command is done. + delete pending_commands[name]; + else + for ( read_file in pending_files[name] ) + Input::add_event([$source=fmt("%s", read_file), + $name=fmt("%s_%s", name, read_file), + $reader=Input::READER_RAW, + $want_record=F, + $fields=FileLine, + $ev=Exec::file_line]); } function run(cmd: Command): Result { - cmd$uid = unique_id(""); + add pending_commands[cmd$uid]; results[cmd$uid] = []; if ( cmd?$read_files ) { for ( read_file in cmd$read_files ) { - add currently_tracked_files[read_file]; - system(fmt("touch \"%s\" 2>/dev/null", str_shell_escape(read_file))); - schedule 1msec { Exec::start_watching_file(cmd$uid, read_file) }; + if ( cmd$uid !in pending_files ) + pending_files[cmd$uid] = set(); + add pending_files[cmd$uid][read_file]; } } @@ -144,9 +167,8 @@ function run(cmd: Command): Result $want_record=F, $config=config_strings]); - return when ( cmd$uid in finished_commands ) + return when ( cmd$uid !in pending_commands ) { - delete finished_commands[cmd$uid]; local result = results[cmd$uid]; delete results[cmd$uid]; return result; @@ -155,9 +177,8 @@ function run(cmd: Command): Result event bro_done() { - # We are punting here and just deleting any files that haven't been processed yet. - for ( fname in currently_tracked_files ) - { - system(fmt("rm \"%s\"", str_shell_escape(fname))); - } + # We are punting here and just deleting any unprocessed files. + for ( uid in pending_files ) + for ( fname in pending_files[uid] ) + system(fmt("rm \"%s\"", str_shell_escape(fname))); } diff --git a/testing/btest/Baseline/scripts.base.utils.active-http/bro..stdout b/testing/btest/Baseline/scripts.base.utils.active-http/bro..stdout new file mode 100644 index 0000000000..0284eb19b3 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.utils.active-http/bro..stdout @@ -0,0 +1,5 @@ +[code=200, msg=OK^M, body=It works!, headers={ +[Server] = 1.0, +[Content-type] = text/plain, +[Date] = July 22, 2013 +}] diff --git a/testing/btest/Baseline/scripts.base.utils.dir/bro..stdout b/testing/btest/Baseline/scripts.base.utils.dir/bro..stdout new file mode 100644 index 0000000000..c3103b7f64 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.utils.dir/bro..stdout @@ -0,0 +1,10 @@ +new_file1, ../testdir/bye +new_file1, ../testdir/hi +new_file1, ../testdir/howsitgoing +new_file2, ../testdir/bye +new_file2, ../testdir/hi +new_file2, ../testdir/howsitgoing +new_file1, ../testdir/bye +new_file1, ../testdir/newone +new_file2, ../testdir/bye +new_file2, ../testdir/newone diff --git a/testing/btest/Baseline/scripts.base.utils.exec/bro..stdout b/testing/btest/Baseline/scripts.base.utils.exec/bro..stdout new file mode 100644 index 0000000000..5352d15d18 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.utils.exec/bro..stdout @@ -0,0 +1,7 @@ +test1, [exit_code=0, signal_exit=F, stdout=[done, exit, stop], stderr=, files={ +[out1] = [insert text here, and here], +[out2] = [insert more text here, and there] +}] +test2, [exit_code=1, signal_exit=F, stdout=[here's something on stdout, some more stdout, last stdout], stderr=[and some stderr, more stderr, last stderr], files=] +test3, [exit_code=9, signal_exit=F, stdout=[FML], stderr=, files=] +test4, [exit_code=0, signal_exit=F, stdout=[hibye], stderr=, files=] diff --git a/testing/btest/scripts/base/utils/active-http.test b/testing/btest/scripts/base/utils/active-http.test new file mode 100644 index 0000000000..9ac762b9b7 --- /dev/null +++ b/testing/btest/scripts/base/utils/active-http.test @@ -0,0 +1,25 @@ +# @TEST-EXEC: btest-bg-run httpd python $SCRIPTS/httpd.py --max 1 +# @TEST-EXEC: sleep 3 +# @TEST-EXEC: btest-bg-run bro bro -b %INPUT +# @TEST-EXEC: btest-bg-wait 15 +# @TEST-EXEC: btest-diff bro/.stdout + +@load base/utils/active-http + +redef exit_only_after_terminate = T; + +event bro_init() + { + local req = ActiveHTTP::Request($url="localhost:32123"); + + when ( local resp = ActiveHTTP::request(req) ) + { + print resp; + terminate(); + } + timeout 1min + { + print "HTTP request timeout"; + terminate(); + } + } diff --git a/testing/btest/scripts/base/utils/dir.test b/testing/btest/scripts/base/utils/dir.test new file mode 100644 index 0000000000..44fee3860f --- /dev/null +++ b/testing/btest/scripts/base/utils/dir.test @@ -0,0 +1,58 @@ +# @TEST-EXEC: btest-bg-run bro bro -b ../dirtest.bro +# @TEST-EXEC: btest-bg-wait 10 +# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff bro/.stdout + +@TEST-START-FILE dirtest.bro + +@load base/utils/dir + +redef exit_only_after_terminate = T; + +global c: count = 0; + +function check_terminate_condition() + { + c += 1; + + if ( c == 10 ) + terminate(); + } + +function new_file1(fname: string) + { + print "new_file1", fname; + check_terminate_condition(); + } + +function new_file2(fname: string) + { + print "new_file2", fname; + check_terminate_condition(); + } + +event change_things() + { + system("touch ../testdir/newone"); + system("rm ../testdir/bye && touch ../testdir/bye"); + } + +event bro_init() + { + Dir::monitor("../testdir", new_file1, .5sec); + Dir::monitor("../testdir", new_file2, 1sec); + schedule 1sec { change_things() }; + } + +@TEST-END-FILE + +@TEST-START-FILE testdir/hi +123 +@TEST-END-FILE + +@TEST-START-FILE testdir/howsitgoing +abc +@TEST-END-FILE + +@TEST-START-FILE testdir/bye +!@# +@TEST-END-FILE diff --git a/testing/btest/scripts/base/utils/exec.test b/testing/btest/scripts/base/utils/exec.test new file mode 100644 index 0000000000..8876f0f49b --- /dev/null +++ b/testing/btest/scripts/base/utils/exec.test @@ -0,0 +1,74 @@ +# @TEST-EXEC: btest-bg-run bro bro -b ../exectest.bro +# @TEST-EXEC: btest-bg-wait 10 +# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff bro/.stdout + +@TEST-START-FILE exectest.bro + +@load base/utils/exec + +redef exit_only_after_terminate = T; + +global c: count = 0; + +function check_exit_condition() + { + c += 1; + + if ( c == 4 ) + terminate(); + } + +function test_cmd(label: string, cmd: Exec::Command) + { + when ( local result = Exec::run(cmd) ) + { + print label, result; + check_exit_condition(); + } + } + +event bro_init() + { + test_cmd("test1", [$cmd="bash ../somescript.sh", + $read_files=set("out1", "out2")]); + test_cmd("test2", [$cmd="bash ../nofiles.sh"]); + test_cmd("test3", [$cmd="bash ../suicide.sh"]); + test_cmd("test4", [$cmd="bash ../stdin.sh", $stdin="hibye"]); + } + +@TEST-END-FILE + +@TEST-START-FILE somescript.sh +#! /usr/bin/env bash +echo "insert text here" > out1 +echo "and here" >> out1 +echo "insert more text here" > out2 +echo "and there" >> out2 +echo "done" +echo "exit" +echo "stop" +@TEST-END-FILE + +@TEST-START-FILE nofiles.sh +#! /usr/bin/env bash +echo "here's something on stdout" +echo "some more stdout" +echo "last stdout" +echo "and some stderr" 1>&2 +echo "more stderr" 1>&2 +echo "last stderr" 1>&2 +exit 1 +@TEST-END-FILE + +@TEST-START-FILE suicide.sh +#! /usr/bin/env bash +echo "FML" +kill -9 $$ +echo "nope" +@TEST-END-FILE + +@TEST-START-FILE stdin.sh +#! /usr/bin/env bash +read -r line +echo "$line" +@TEST-END-FILE diff --git a/testing/scripts/httpd.py b/testing/scripts/httpd.py new file mode 100755 index 0000000000..0732614bc2 --- /dev/null +++ b/testing/scripts/httpd.py @@ -0,0 +1,40 @@ +#! /usr/bin/env python + +import BaseHTTPServer + +class MyRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): + + def do_GET(self): + self.send_response(200) + self.send_header("Content-type", "text/plain") + self.end_headers() + self.wfile.write("It works!") + + def version_string(self): + return "1.0" + + def date_time_string(self): + return "July 22, 2013" + + +if __name__ == "__main__": + from optparse import OptionParser + p = OptionParser() + p.add_option("-a", "--addr", type="string", default="localhost", + help=("listen on given address (numeric IP or host name), " + "an empty string (the default) means INADDR_ANY")) + p.add_option("-p", "--port", type="int", default=32123, + help="listen on given TCP port number") + p.add_option("-m", "--max", type="int", default=-1, + help="max number of requests to respond to, -1 means no max") + options, args = p.parse_args() + + httpd = BaseHTTPServer.HTTPServer((options.addr, options.port), + MyRequestHandler) + if options.max == -1: + httpd.serve_forever() + else: + served_count = 0 + while served_count != options.max: + httpd.handle_request() + served_count += 1 From 474107fe40c22dec977d4e9ee3dad0edcbc02344 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Tue, 23 Jul 2013 17:16:57 -0700 Subject: [PATCH 094/118] Broifying the code. Also extending API documentation a bit more and fixing a memory leak. --- src/Func.cc | 4 +- src/H3.h | 4 +- src/OpaqueVal.cc | 159 ++-- src/OpaqueVal.h | 67 +- src/Type.cc | 1 + src/probabilistic/BitVector.cc | 777 ++++++++++-------- src/probabilistic/BitVector.h | 575 +++++++------ src/probabilistic/BloomFilter.cc | 229 +++--- src/probabilistic/BloomFilter.h | 229 ++++-- src/probabilistic/CounterVector.cc | 244 +++--- src/probabilistic/CounterVector.h | 208 ++--- src/probabilistic/Hasher.cc | 63 +- src/probabilistic/Hasher.h | 262 +++--- src/probabilistic/bloom-filter.bif | 122 +-- src/util.cc | 20 +- src/util.h | 8 +- .../btest/Baseline/bifs.bloomfilter/output | 6 + testing/btest/bifs/bloomfilter.bro | 2 +- 18 files changed, 1651 insertions(+), 1329 deletions(-) diff --git a/src/Func.cc b/src/Func.cc index a0d2299933..483699668f 100644 --- a/src/Func.cc +++ b/src/Func.cc @@ -560,7 +560,7 @@ void builtin_error(const char* msg, BroObj* arg) #include "reporter.bif.func_def" #include "strings.bif.func_def" -// TODO: Add a nicer mechanism to pull subdirectory bifs automatically. +// TODO: Add a nicer mechanism to pull in subdirectory bifs automatically. #include "probabilistic/bloom-filter.bif.h" void init_builtin_funcs() @@ -577,7 +577,7 @@ void init_builtin_funcs() #include "reporter.bif.func_init" #include "strings.bif.func_init" -// TODO: Add a nicer mechanism to pull subdirectory bifs automatically. +// TODO: Add a nicer mechanism to pull in subdirectory bifs automatically. #include "probabilistic/bloom-filter.bif.init.cc" did_builtin_init = true; diff --git a/src/H3.h b/src/H3.h index 123dd6f374..8ea5848816 100644 --- a/src/H3.h +++ b/src/H3.h @@ -100,8 +100,8 @@ public: // loop optmized with Duff's Device register unsigned n = (size + 7) / 8; switch ( size % 8 ) { - case 0: do { result ^= byte_lookup[offset++][*p++]; - case 7: result ^= byte_lookup[offset++][*p++]; + case 0: do { result ^= byte_lookup[offset++][*p++]; + case 7: result ^= byte_lookup[offset++][*p++]; case 6: result ^= byte_lookup[offset++][*p++]; case 5: result ^= byte_lookup[offset++][*p++]; case 4: result ^= byte_lookup[offset++][*p++]; diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index 04032b2cfc..efdd890f70 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -1,5 +1,6 @@ -#include "OpaqueVal.h" +// See the file "COPYING" in the main distribution directory for copyright. +#include "OpaqueVal.h" #include "NetVar.h" #include "Reporter.h" #include "Serializer.h" @@ -518,87 +519,89 @@ bool EntropyVal::DoUnserialize(UnserialInfo* info) } BloomFilterVal::BloomFilterVal() - : OpaqueVal(bloomfilter_type), - type_(NULL), - hash_(NULL), - bloom_filter_(NULL) + : OpaqueVal(bloomfilter_type) { + type = 0; + hash = 0; + bloom_filter = 0; } BloomFilterVal::BloomFilterVal(OpaqueType* t) - : OpaqueVal(t), - type_(NULL), - hash_(NULL), - bloom_filter_(NULL) + : OpaqueVal(t) { + type = 0; + hash = 0; + bloom_filter = 0; } BloomFilterVal::BloomFilterVal(probabilistic::BloomFilter* bf) - : OpaqueVal(bloomfilter_type), - type_(NULL), - hash_(NULL), - bloom_filter_(bf) + : OpaqueVal(bloomfilter_type) { + type = 0; + hash = 0; + bloom_filter = bf; } -bool BloomFilterVal::Typify(BroType* type) - { - if ( type_ ) - return false; - type_ = type; - type_->Ref(); - TypeList* tl = new TypeList(type_); - tl->Append(type_); - hash_ = new CompositeHash(tl); - Unref(tl); - return true; - } +bool BloomFilterVal::Typify(BroType* arg_type) + { + if ( type ) + return false; + + type = arg_type; + type->Ref(); + + TypeList* tl = new TypeList(type); + tl->Append(type); + hash = new CompositeHash(tl); + Unref(tl); + + return true; + } BroType* BloomFilterVal::Type() const - { - return type_; - } + { + return type; + } void BloomFilterVal::Add(const Val* val) - { - HashKey* key = hash_->ComputeHash(val, 1); - bloom_filter_->Add(key->Hash()); - } + { + HashKey* key = hash->ComputeHash(val, 1); + bloom_filter->Add(key->Hash()); + delete key; + } size_t BloomFilterVal::Count(const Val* val) const - { - HashKey* key = hash_->ComputeHash(val, 1); - return bloom_filter_->Count(key->Hash()); - } + { + HashKey* key = hash->ComputeHash(val, 1); + size_t cnt = bloom_filter->Count(key->Hash()); + delete key; + return cnt; + } BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x, const BloomFilterVal* y) - { - if ( x->Type() != y->Type() ) - { - reporter->InternalError("cannot merge Bloom filters with different types"); - return NULL; - } + { + if ( ! same_type(x->Type(), y->Type()) ) + reporter->InternalError("cannot merge Bloom filters with different types"); - BloomFilterVal* result; - if ( (result = DoMerge(x, y)) ) - return result; - else if ( (result = DoMerge(x, y)) ) - return result; + BloomFilterVal* result; - reporter->InternalError("failed to merge Bloom filters"); - return NULL; - } + if ( (result = DoMerge(x, y)) ) + return result; + + else if ( (result = DoMerge(x, y)) ) + return result; + + reporter->InternalError("failed to merge Bloom filters"); + return 0; + } BloomFilterVal::~BloomFilterVal() - { - if ( type_ ) - Unref(type_); - if ( hash_ ) - delete hash_; - if ( bloom_filter_ ) - delete bloom_filter_; - } + { + Unref(type); + delete hash; + delete bloom_filter; + } IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL); @@ -606,14 +609,16 @@ bool BloomFilterVal::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal); - bool is_typed = type_ != NULL; - if ( ! SERIALIZE(is_typed) ) - return false; - if ( is_typed && ! type_->Serialize(info) ) - return false; + bool is_typed = (type != 0); - return bloom_filter_->Serialize(info); - } + if ( ! SERIALIZE(is_typed) ) + return false; + + if ( is_typed && ! type->Serialize(info) ) + return false; + + return bloom_filter->Serialize(info); + } bool BloomFilterVal::DoUnserialize(UnserialInfo* info) { @@ -621,15 +626,17 @@ bool BloomFilterVal::DoUnserialize(UnserialInfo* info) bool is_typed; if ( ! UNSERIALIZE(&is_typed) ) - return false; - if ( is_typed ) - { - BroType* type = BroType::Unserialize(info); - if ( ! Typify(type) ) - return false; - Unref(type); - } + return false; - bloom_filter_ = probabilistic::BloomFilter::Unserialize(info); - return bloom_filter_ != NULL; - } + if ( is_typed ) + { + BroType* type = BroType::Unserialize(info); + if ( ! Typify(type) ) + return false; + + Unref(type); + } + + bloom_filter = probabilistic::BloomFilter::Unserialize(info); + return bloom_filter != 0; + } diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h index 5ccf73e11f..ea704cb70a 100644 --- a/src/OpaqueVal.h +++ b/src/OpaqueVal.h @@ -116,21 +116,19 @@ private: }; class BloomFilterVal : public OpaqueVal { - BloomFilterVal(const BloomFilterVal&); - BloomFilterVal& operator=(const BloomFilterVal&); public: - static BloomFilterVal* Merge(const BloomFilterVal* x, - const BloomFilterVal* y); - explicit BloomFilterVal(probabilistic::BloomFilter* bf); - ~BloomFilterVal(); + virtual ~BloomFilterVal(); - bool Typify(BroType* type); BroType* Type() const; + bool Typify(BroType* type); void Add(const Val* val); size_t Count(const Val* val) const; + static BloomFilterVal* Merge(const BloomFilterVal* x, + const BloomFilterVal* y); + protected: friend class Val; BloomFilterVal(); @@ -139,32 +137,35 @@ protected: DECLARE_SERIAL(BloomFilterVal); private: - template - static BloomFilterVal* DoMerge(const BloomFilterVal* x, - const BloomFilterVal* y) - { - if ( typeid(*x->bloom_filter_) != typeid(*y->bloom_filter_) ) - { - reporter->InternalError("cannot merge different Bloom filter types"); - return NULL; - } - if ( typeid(T) != typeid(*x->bloom_filter_) ) - return NULL; - const T* a = static_cast(x->bloom_filter_); - const T* b = static_cast(y->bloom_filter_); - BloomFilterVal* merged = new BloomFilterVal(T::Merge(a, b)); - assert(merged); - if ( ! merged->Typify(x->Type()) ) - { - reporter->InternalError("failed to set type on merged Bloom filter"); - return NULL; - } - return merged; - } + // Disable. + BloomFilterVal(const BloomFilterVal&); + BloomFilterVal& operator=(const BloomFilterVal&); - BroType* type_; - CompositeHash* hash_; - probabilistic::BloomFilter* bloom_filter_; -}; + template + static BloomFilterVal* DoMerge(const BloomFilterVal* x, + const BloomFilterVal* y) + { + if ( typeid(*x->bloom_filter) != typeid(*y->bloom_filter) ) + reporter->InternalError("cannot merge different Bloom filter types"); + + if ( typeid(T) != typeid(*x->bloom_filter) ) + return 0; + + const T* a = static_cast(x->bloom_filter); + const T* b = static_cast(y->bloom_filter); + + BloomFilterVal* merged = new BloomFilterVal(T::Merge(a, b)); + assert(merged); + + if ( ! merged->Typify(x->Type()) ) + reporter->InternalError("failed to set type on merged Bloom filter"); + + return merged; + } + + BroType* type; + CompositeHash* hash; + probabilistic::BloomFilter* bloom_filter; + }; #endif diff --git a/src/Type.cc b/src/Type.cc index 57d9d0e6e5..563bc5afbd 100644 --- a/src/Type.cc +++ b/src/Type.cc @@ -1321,6 +1321,7 @@ bool OpaqueType::DoUnserialize(UnserialInfo* info) const char* n; if ( ! UNSERIALIZE_STR(&n, 0) ) return false; + name = n; delete [] n; diff --git a/src/probabilistic/BitVector.cc b/src/probabilistic/BitVector.cc index 67714fe7d0..98f008b24b 100644 --- a/src/probabilistic/BitVector.cc +++ b/src/probabilistic/BitVector.cc @@ -1,3 +1,5 @@ +// See the file "COPYING" in the main distribution directory for copyright. + #include "BitVector.h" #include @@ -8,505 +10,558 @@ using namespace probabilistic; BitVector::size_type BitVector::npos = static_cast(-1); BitVector::block_type BitVector::bits_per_block = - std::numeric_limits::digits; + std::numeric_limits::digits; namespace { uint8_t count_table[] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, - 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, - 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, - 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, - 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, - 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, - 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, - 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, - 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, - 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, - 6, 7, 6, 7, 7, 8 + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, + 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, + 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, + 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, + 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, + 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, + 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, + 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, + 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, + 6, 7, 6, 7, 7, 8 }; } // namespace BitVector::Reference::Reference(block_type& block, block_type i) - : block_(block), - mask_(block_type(1) << i) - { - assert(i < bits_per_block); - } + : block(block), mask((block_type(1) << i)) + { + assert(i < bits_per_block); + } BitVector::Reference& BitVector::Reference::Flip() - { - block_ ^= mask_; - return *this; - } + { + block ^= mask; + return *this; + } BitVector::Reference::operator bool() const - { - return (block_ & mask_) != 0; - } + { + return (block & mask) != 0; + } bool BitVector::Reference::operator~() const - { - return (block_ & mask_) == 0; - } + { + return (block & mask) == 0; + } BitVector::Reference& BitVector::Reference::operator=(bool x) - { - x ? block_ |= mask_ : block_ &= ~mask_; - return *this; - } + { + if ( x ) + block |= mask; + else + block &= ~mask; -BitVector::Reference& BitVector::Reference::operator=(Reference const& other) - { - other ? block_ |= mask_ : block_ &= ~mask_; - return *this; - } + return *this; + } + +BitVector::Reference& BitVector::Reference::operator=(const Reference& other) + { + if ( other ) + block |= mask; + else + block &= ~mask; + + return *this; + } BitVector::Reference& BitVector::Reference::operator|=(bool x) - { - if (x) - block_ |= mask_; - return *this; - } + { + if ( x ) + block |= mask; + + return *this; + } BitVector::Reference& BitVector::Reference::operator&=(bool x) - { - if (! x) - block_ &= ~mask_; - return *this; - } + { + if ( ! x ) + block &= ~mask; + + return *this; + } BitVector::Reference& BitVector::Reference::operator^=(bool x) - { - if (x) - block_ ^= mask_; - return *this; - } + { + if ( x ) + block ^= mask; + + return *this; + } BitVector::Reference& BitVector::Reference::operator-=(bool x) - { - if (x) - block_ &= ~mask_; - return *this; - } + { + if ( x ) + block &= ~mask; + return *this; + } -BitVector::BitVector() : num_bits_(0) { } +BitVector::BitVector() + { + num_bits = 0; + } BitVector::BitVector(size_type size, bool value) - : bits_(bits_to_blocks(size), value ? ~block_type(0) : 0), - num_bits_(size) -{ } + : bits(bits_to_blocks(size), value ? ~block_type(0) : 0) + { + num_bits = size; + } BitVector::BitVector(BitVector const& other) - : bits_(other.bits_), - num_bits_(other.num_bits_) -{ } + : bits(other.bits) + { + num_bits = other.num_bits; + } BitVector BitVector::operator~() const - { - BitVector b(*this); - b.Flip(); - return b; - } + { + BitVector b(*this); + b.Flip(); + return b; + } BitVector& BitVector::operator=(BitVector const& other) - { - bits_ = other.bits_; - return *this; - } + { + bits = other.bits; + return *this; + } BitVector BitVector::operator<<(size_type n) const - { - BitVector b(*this); - return b <<= n; - } + { + BitVector b(*this); + return b <<= n; + } BitVector BitVector::operator>>(size_type n) const - { - BitVector b(*this); - return b >>= n; - } + { + BitVector b(*this); + return b >>= n; + } BitVector& BitVector::operator<<=(size_type n) - { - if (n >= num_bits_) - return Reset(); + { + if ( n >= num_bits ) + return Reset(); - if (n > 0) - { - size_type last = Blocks() - 1; - size_type div = n / bits_per_block; - block_type r = bit_index(n); - block_type* b = &bits_[0]; - assert(Blocks() >= 1); - assert(div <= last); + if ( n > 0 ) + { + size_type last = Blocks() - 1; + size_type div = n / bits_per_block; + block_type r = bit_index(n); + block_type* b = &bits[0]; - if (r != 0) - { - for (size_type i = last - div; i > 0; --i) - b[i + div] = (b[i] << r) | (b[i - 1] >> (bits_per_block - r)); - b[div] = b[0] << r; - } - else - { - for (size_type i = last-div; i > 0; --i) - b[i + div] = b[i]; - b[div] = b[0]; - } + assert(Blocks() >= 1); + assert(div <= last); - std::fill_n(b, div, block_type(0)); - zero_unused_bits(); - } + if ( r != 0 ) + { + for ( size_type i = last - div; i > 0; --i ) + b[i + div] = (b[i] << r) | (b[i - 1] >> (bits_per_block - r)); - return *this; - } + b[div] = b[0] << r; + } + + else + { + for (size_type i = last-div; i > 0; --i) + b[i + div] = b[i]; + + b[div] = b[0]; + } + + std::fill_n(b, div, block_type(0)); + zero_unused_bits(); + } + + return *this; + } BitVector& BitVector::operator>>=(size_type n) - { - if (n >= num_bits_) - return Reset(); + { + if ( n >= num_bits ) + return Reset(); - if (n > 0) - { - size_type last = Blocks() - 1; - size_type div = n / bits_per_block; - block_type r = bit_index(n); - block_type* b = &bits_[0]; - assert(Blocks() >= 1); - assert(div <= last); + if ( n > 0 ) + { + size_type last = Blocks() - 1; + size_type div = n / bits_per_block; + block_type r = bit_index(n); + block_type* b = &bits[0]; - if (r != 0) - { - for (size_type i = last - div; i > 0; --i) - b[i - div] = (b[i] >> r) | (b[i + 1] << (bits_per_block - r)); - b[last - div] = b[last] >> r; - } - else - { - for (size_type i = div; i <= last; ++i) - b[i-div] = b[i]; - } + assert(Blocks() >= 1); + assert(div <= last); - std::fill_n(b + (Blocks() - div), div, block_type(0)); - } - return *this; - } + if ( r != 0 ) + { + for (size_type i = last - div; i > 0; --i) + b[i - div] = (b[i] >> r) | (b[i + 1] << (bits_per_block - r)); + + b[last - div] = b[last] >> r; + } + + else + { + for (size_type i = div; i <= last; ++i) + b[i-div] = b[i]; + } + + std::fill_n(b + (Blocks() - div), div, block_type(0)); + } + + return *this; + } BitVector& BitVector::operator&=(BitVector const& other) - { - assert(Size() >= other.Size()); - for (size_type i = 0; i < Blocks(); ++i) - bits_[i] &= other.bits_[i]; - return *this; - } + { + assert(Size() >= other.Size()); + + for ( size_type i = 0; i < Blocks(); ++i ) + bits[i] &= other.bits[i]; + + return *this; + } BitVector& BitVector::operator|=(BitVector const& other) - { - assert(Size() >= other.Size()); - for (size_type i = 0; i < Blocks(); ++i) - bits_[i] |= other.bits_[i]; - return *this; - } + { + assert(Size() >= other.Size()); + + for ( size_type i = 0; i < Blocks(); ++i ) + bits[i] |= other.bits[i]; + + return *this; + } BitVector& BitVector::operator^=(BitVector const& other) - { - assert(Size() >= other.Size()); - for (size_type i = 0; i < Blocks(); ++i) - bits_[i] ^= other.bits_[i]; - return *this; - } + { + assert(Size() >= other.Size()); + + for ( size_type i = 0; i < Blocks(); ++i ) + bits[i] ^= other.bits[i]; + + return *this; + } BitVector& BitVector::operator-=(BitVector const& other) - { - assert(Size() >= other.Size()); - for (size_type i = 0; i < Blocks(); ++i) - bits_[i] &= ~other.bits_[i]; - return *this; - } + { + assert(Size() >= other.Size()); + + for ( size_type i = 0; i < Blocks(); ++i ) + bits[i] &= ~other.bits[i]; + + return *this; + } namespace probabilistic { BitVector operator&(BitVector const& x, BitVector const& y) - { - BitVector b(x); - return b &= y; - } + { + BitVector b(x); + return b &= y; + } BitVector operator|(BitVector const& x, BitVector const& y) - { - BitVector b(x); - return b |= y; - } + { + BitVector b(x); + return b |= y; + } BitVector operator^(BitVector const& x, BitVector const& y) - { - BitVector b(x); - return b ^= y; - } + { + BitVector b(x); + return b ^= y; + } BitVector operator-(BitVector const& x, BitVector const& y) - { - BitVector b(x); - return b -= y; - } + { + BitVector b(x); + return b -= y; + } bool operator==(BitVector const& x, BitVector const& y) - { - return x.num_bits_ == y.num_bits_ && x.bits_ == y.bits_; - } + { + return x.num_bits == y.num_bits && x.bits == y.bits; + } bool operator!=(BitVector const& x, BitVector const& y) - { - return ! (x == y); - } + { + return ! (x == y); + } bool operator<(BitVector const& x, BitVector const& y) - { - assert(x.Size() == y.Size()); - for (BitVector::size_type r = x.Blocks(); r > 0; --r) - { - BitVector::size_type i = r - 1; - if (x.bits_[i] < y.bits_[i]) - return true; - else if (x.bits_[i] > y.bits_[i]) - return false; - } - return false; - } + { + assert(x.Size() == y.Size()); + + for ( BitVector::size_type r = x.Blocks(); r > 0; --r ) + { + BitVector::size_type i = r - 1; + + if ( x.bits[i] < y.bits[i] ) + return true; + + else if ( x.bits[i] > y.bits[i] ) + return false; + + } + + return false; + } } void BitVector::Resize(size_type n, bool value) - { - size_type old = Blocks(); - size_type required = bits_to_blocks(n); - block_type block_value = value ? ~block_type(0) : block_type(0); + { + size_type old = Blocks(); + size_type required = bits_to_blocks(n); + block_type block_value = value ? ~block_type(0) : block_type(0); - if (required != old) - bits_.resize(required, block_value); + if ( required != old ) + bits.resize(required, block_value); - if (value && (n > num_bits_) && extra_bits()) - bits_[old - 1] |= (block_value << extra_bits()); + if ( value && (n > num_bits) && extra_bits() ) + bits[old - 1] |= (block_value << extra_bits()); - num_bits_ = n; - zero_unused_bits(); - } + num_bits = n; + zero_unused_bits(); + } void BitVector::Clear() - { - bits_.clear(); - num_bits_ = 0; - } + { + bits.clear(); + num_bits = 0; + } void BitVector::PushBack(bool bit) - { - size_type s = Size(); - Resize(s + 1); - Set(s, bit); - } + { + size_type s = Size(); + Resize(s + 1); + Set(s, bit); + } void BitVector::Append(block_type block) - { - size_type excess = extra_bits(); - if (excess) - { - assert(! Empty()); - bits_.push_back(block >> (bits_per_block - excess)); - bits_[Blocks() - 2] |= (block << excess); - } - else - { - bits_.push_back(block); - } - num_bits_ += bits_per_block; - } + { + size_type excess = extra_bits(); + + if ( excess ) + { + assert(! Empty()); + bits.push_back(block >> (bits_per_block - excess)); + bits[Blocks() - 2] |= (block << excess); + } + + else + { + bits.push_back(block); + } + + num_bits += bits_per_block; + } BitVector& BitVector::Set(size_type i, bool bit) - { - assert(i < num_bits_); - if (bit) - bits_[block_index(i)] |= bit_mask(i); - else - Reset(i); - return *this; - } + { + assert(i < num_bits); + + if ( bit ) + bits[block_index(i)] |= bit_mask(i); + else + Reset(i); + + return *this; + } BitVector& BitVector::Set() - { - std::fill(bits_.begin(), bits_.end(), ~block_type(0)); - zero_unused_bits(); - return *this; - } + { + std::fill(bits.begin(), bits.end(), ~block_type(0)); + zero_unused_bits(); + return *this; + } BitVector& BitVector::Reset(size_type i) - { - assert(i < num_bits_); - bits_[block_index(i)] &= ~bit_mask(i); - return *this; - } + { + assert(i < num_bits); + bits[block_index(i)] &= ~bit_mask(i); + return *this; + } BitVector& BitVector::Reset() - { - std::fill(bits_.begin(), bits_.end(), block_type(0)); - return *this; - } + { + std::fill(bits.begin(), bits.end(), block_type(0)); + return *this; + } BitVector& BitVector::Flip(size_type i) - { - assert(i < num_bits_); - bits_[block_index(i)] ^= bit_mask(i); - return *this; - } + { + assert(i < num_bits); + bits[block_index(i)] ^= bit_mask(i); + return *this; + } BitVector& BitVector::Flip() - { - for (size_type i = 0; i < Blocks(); ++i) - bits_[i] = ~bits_[i]; - zero_unused_bits(); - return *this; - } + { + for (size_type i = 0; i < Blocks(); ++i) + bits[i] = ~bits[i]; + + zero_unused_bits(); + return *this; + } bool BitVector::operator[](size_type i) const - { - assert(i < num_bits_); - return (bits_[block_index(i)] & bit_mask(i)) != 0; - } + { + assert(i < num_bits); + return (bits[block_index(i)] & bit_mask(i)) != 0; + } BitVector::Reference BitVector::operator[](size_type i) - { - assert(i < num_bits_); - return Reference(bits_[block_index(i)], bit_index(i)); - } + { + assert(i < num_bits); + return Reference(bits[block_index(i)], bit_index(i)); + } BitVector::size_type BitVector::Count() const - { - std::vector::const_iterator first = bits_.begin(); - size_t n = 0; - size_type length = Blocks(); - while (length) - { - block_type block = *first; - while (block) - { - // TODO: use __popcnt if available. - n += count_table[block & ((1u << 8) - 1)]; - block >>= 8; - } - ++first; - --length; - } - return n; - } + { + std::vector::const_iterator first = bits.begin(); + size_t n = 0; + size_type length = Blocks(); + + while ( length ) + { + block_type block = *first; + + while ( block ) + { + // TODO: use _popcnt if available. + n += count_table[block & ((1u << 8) - 1)]; + block >>= 8; + } + + ++first; + --length; + } + + return n; + } BitVector::size_type BitVector::Blocks() const - { - return bits_.size(); - } + { + return bits.size(); + } BitVector::size_type BitVector::Size() const - { - return num_bits_; - } + { + return num_bits; + } bool BitVector::Empty() const - { - return bits_.empty(); - } + { + return bits.empty(); + } BitVector::size_type BitVector::FindFirst() const - { - return find_from(0); - } + { + return find_from(0); + } BitVector::size_type BitVector::FindNext(size_type i) const - { - if (i >= (Size() - 1) || Size() == 0) - return npos; - ++i; - size_type bi = block_index(i); - block_type block = bits_[bi] & (~block_type(0) << bit_index(i)); - return block ? bi * bits_per_block + lowest_bit(block) : find_from(bi + 1); - } + { + if ( i >= (Size() - 1) || Size() == 0 ) + return npos; + + ++i; + size_type bi = block_index(i); + block_type block = bits[bi] & (~block_type(0) << bit_index(i)); + return block ? bi * bits_per_block + lowest_bit(block) : find_from(bi + 1); + } BitVector::size_type BitVector::lowest_bit(block_type block) - { - block_type x = block - (block & (block - 1)); - size_type log = 0; - while (x >>= 1) - ++log; - return log; - } + { + block_type x = block - (block & (block - 1)); + size_type log = 0; + + while (x >>= 1) + ++log; + + return log; + } BitVector::block_type BitVector::extra_bits() const - { - return bit_index(Size()); - } + { + return bit_index(Size()); + } void BitVector::zero_unused_bits() - { - if (extra_bits()) - bits_.back() &= ~(~block_type(0) << extra_bits()); - } + { + if ( extra_bits() ) + bits.back() &= ~(~block_type(0) << extra_bits()); + } BitVector::size_type BitVector::find_from(size_type i) const - { - while (i < Blocks() && bits_[i] == 0) - ++i; - if (i >= Blocks()) - return npos; - return i * bits_per_block + lowest_bit(bits_[i]); - } + { + while (i < Blocks() && bits[i] == 0) + ++i; + + if ( i >= Blocks() ) + return npos; + + return i * bits_per_block + lowest_bit(bits[i]); + } bool BitVector::Serialize(SerialInfo* info) const - { - return SerialObj::Serialize(info); - } + { + return SerialObj::Serialize(info); + } BitVector* BitVector::Unserialize(UnserialInfo* info) - { - return reinterpret_cast( - SerialObj::Unserialize(info, SER_BITVECTOR)); - } + { + return reinterpret_cast(SerialObj::Unserialize(info, SER_BITVECTOR)); + } IMPLEMENT_SERIAL(BitVector, SER_BITVECTOR); bool BitVector::DoSerialize(SerialInfo* info) const - { - DO_SERIALIZE(SER_BITVECTOR, SerialObj); + { + DO_SERIALIZE(SER_BITVECTOR, SerialObj); - if ( ! SERIALIZE(static_cast(bits_.size())) ) - return false; + if ( ! SERIALIZE(static_cast(bits.size())) ) + return false; - for ( size_t i = 0; i < bits_.size(); ++i ) - if ( ! SERIALIZE(static_cast(bits_[i])) ) - return false; + for ( size_t i = 0; i < bits.size(); ++i ) + if ( ! SERIALIZE(static_cast(bits[i])) ) + return false; - return SERIALIZE(static_cast(num_bits_)); - } + return SERIALIZE(static_cast(num_bits)); + } bool BitVector::DoUnserialize(UnserialInfo* info) - { - DO_UNSERIALIZE(SerialObj); + { + DO_UNSERIALIZE(SerialObj); - uint64 size; - if ( ! UNSERIALIZE(&size) ) - return false; + uint64 size; + if ( ! UNSERIALIZE(&size) ) + return false; - bits_.resize(static_cast(size)); - uint64 block; - for ( size_t i = 0; i < bits_.size(); ++i ) - { - if ( ! UNSERIALIZE(&block) ) - return false; - bits_[i] = static_cast(block); - } + bits.resize(static_cast(size)); - uint64 num_bits; - if ( ! UNSERIALIZE(&num_bits) ) - return false; - num_bits_ = static_cast(num_bits); + for ( size_t i = 0; i < bits.size(); ++i ) + { + uint64 block; + if ( ! UNSERIALIZE(&block) ) + return false; - return true; - } + bits[i] = static_cast(block); + } + + uint64 num_bits; + if ( ! UNSERIALIZE(&num_bits) ) + return false; + + num_bits = static_cast(num_bits); + + return true; + } diff --git a/src/probabilistic/BitVector.h b/src/probabilistic/BitVector.h index 8832c24cbe..9eefe1b633 100644 --- a/src/probabilistic/BitVector.h +++ b/src/probabilistic/BitVector.h @@ -1,8 +1,11 @@ -#ifndef BitVector_h -#define BitVector_h +// See the file "COPYING" in the main distribution directory for copyright. + +#ifndef PROBABILISTIC_BITVECTOR_H +#define PROBABILISTIC_BITVECTOR_H #include #include + #include "SerialObj.h" namespace probabilistic { @@ -12,322 +15,348 @@ namespace probabilistic { */ class BitVector : public SerialObj { public: - typedef size_t block_type; - typedef size_t size_type; - static size_type npos; - static block_type bits_per_block; + typedef size_t block_type; + typedef size_t size_type; + typedef bool const_reference; -public: - /** - * An lvalue proxy for single bits. - */ - class Reference { - friend class BitVector; - Reference(block_type& block, block_type i); + static size_type npos; + static block_type bits_per_block; - public: - Reference& Flip(); - operator bool() const; - bool operator~() const; - Reference& operator=(bool x); - Reference& operator=(Reference const& other); - Reference& operator|=(bool x); - Reference& operator&=(bool x); - Reference& operator^=(bool x); - Reference& operator-=(bool x); + /** + * An lvalue proxy for individual bits. + */ + class Reference { + public: + /** + * Inverts the bits' values. + */ + Reference& Flip(); - private: - void operator&(); - block_type& block_; - block_type const mask_; - }; + operator bool() const; + bool operator~() const; + Reference& operator=(bool x); + Reference& operator=(const Reference& other); + Reference& operator|=(bool x); + Reference& operator&=(bool x); + Reference& operator^=(bool x); + Reference& operator-=(bool x); - typedef bool const_reference; + private: + friend class BitVector; - /** - * Default-constructs an empty bit vector. - */ - BitVector(); + Reference(block_type& block, block_type i); + void operator&(); - /** - * Constructs a bit vector of a given size. - * @param size The number of bits. - * @param value The value for each bit. - */ - explicit BitVector(size_type size, bool value = false); + block_type& block; + const block_type mask; + }; - /** - * Constructs a bit vector from a sequence of blocks. - */ - template - BitVector(InputIterator first, InputIterator last) - { - bits_.insert(bits_.end(), first, last); - num_bits_ = bits_.size() * bits_per_block; - } + /** + * Default-constructs an empty bit vector. + */ + BitVector(); - /** - * Copy-constructs a bit vector. - * @param other The bit vector to copy. - */ - BitVector(const BitVector& other); + /** + * Constructs a bit vector of a given size. + * @param size The number of bits. + * @param value The value for each bit. + */ + explicit BitVector(size_type size, bool value = false); - /** - * Assigns another bit vector to this instance. - * @param other The RHS of the assignment. - */ - BitVector& operator=(const BitVector& other); + /** + * Constructs a bit vector from a sequence of blocks. + * + * @param first Start of range + * @param last End of range. + * + */ + template + BitVector(InputIterator first, InputIterator last) + { + bits.insert(bits.end(), first, last); + num_bits = bits.size() * bits_per_block; + } - // - // Bitwise operations - // - BitVector operator~() const; - BitVector operator<<(size_type n) const; - BitVector operator>>(size_type n) const; - BitVector& operator<<=(size_type n); - BitVector& operator>>=(size_type n); - BitVector& operator&=(BitVector const& other); - BitVector& operator|=(BitVector const& other); - BitVector& operator^=(BitVector const& other); - BitVector& operator-=(BitVector const& other); - friend BitVector operator&(BitVector const& x, BitVector const& y); - friend BitVector operator|(BitVector const& x, BitVector const& y); - friend BitVector operator^(BitVector const& x, BitVector const& y); - friend BitVector operator-(BitVector const& x, BitVector const& y); + /** + * Copy-constructs a bit vector. + * @param other The bit vector to copy. + */ + BitVector(const BitVector& other); - // - // Relational operators - // - friend bool operator==(BitVector const& x, BitVector const& y); - friend bool operator!=(BitVector const& x, BitVector const& y); - friend bool operator<(BitVector const& x, BitVector const& y); + /** + * Assigns another bit vector to this instance. + * @param other The RHS of the assignment. + */ + BitVector& operator=(const BitVector& other); - // - // Basic operations - // - /** Appends the bits in a sequence of values. - * @tparam Iterator A forward iterator. - * @param first An iterator pointing to the first element of the sequence. - * @param last An iterator pointing to one past the last element of the - * sequence. - */ - template - void Append(ForwardIterator first, ForwardIterator last) - { - if (first == last) - return; + // + // Bitwise operations. + // + BitVector operator~() const; + BitVector operator<<(size_type n) const; + BitVector operator>>(size_type n) const; + BitVector& operator<<=(size_type n); + BitVector& operator>>=(size_type n); + BitVector& operator&=(BitVector const& other); + BitVector& operator|=(BitVector const& other); + BitVector& operator^=(BitVector const& other); + BitVector& operator-=(BitVector const& other); + friend BitVector operator&(BitVector const& x, BitVector const& y); + friend BitVector operator|(BitVector const& x, BitVector const& y); + friend BitVector operator^(BitVector const& x, BitVector const& y); + friend BitVector operator-(BitVector const& x, BitVector const& y); - block_type excess = extra_bits(); - typename std::iterator_traits::difference_type delta = - std::distance(first, last); + // + // Relational operators + // + friend bool operator==(BitVector const& x, BitVector const& y); + friend bool operator!=(BitVector const& x, BitVector const& y); + friend bool operator<(BitVector const& x, BitVector const& y); - bits_.reserve(Blocks() + delta); - if (excess == 0) - { - bits_.back() |= (*first << excess); - do - { - block_type b = *first++ >> (bits_per_block - excess); - bits_.push_back(b | (first == last ? 0 : *first << excess)); - } while (first != last); - } - else - { - bits_.insert(bits_.end(), first, last); - } - num_bits_ += bits_per_block * delta; - } + // + // Basic operations + // - /** - * Appends the bits in a given block. - * @param block The block containing bits to append. - */ - void Append(block_type block); + /** Appends the bits in a sequence of values. + * @tparam Iterator A forward iterator. + * @param first An iterator pointing to the first element of the sequence. + * @param last An iterator pointing to one past the last element of the + * sequence. + */ + template + void Append(ForwardIterator first, ForwardIterator last) + { + if ( first == last ) + return; - /** Appends a single bit to the end of the bit vector. - * @param bit The value of the bit. - */ - void PushBack(bool bit); + block_type excess = extra_bits(); + typename std::iterator_traits::difference_type delta = + std::distance(first, last); - /** - * Clears all bits in the bitvector. - */ - void Clear(); + bits.reserve(Blocks() + delta); - /** - * Resizes the bit vector to a new number of bits. - * @param n The new number of bits of the bit vector. - * @param value The bit value of new values, if the vector expands. - */ - void Resize(size_type n, bool value = false); + if ( excess == 0 ) + { + bits.back() |= (*first << excess); - /** - * Sets a bit at a specific position to a given value. - * @param i The bit position. - * @param bit The value assigned to position *i*. - * @return A reference to the bit vector instance. - */ - BitVector& Set(size_type i, bool bit = true); + do { + block_type b = *first++ >> (bits_per_block - excess); + bits.push_back(b | (first == last ? 0 : *first << excess)); + } while (first != last); - /** - * Sets all bits to 1. - * @return A reference to the bit vector instance. - */ - BitVector& Set(); + } - /** - * Resets a bit at a specific position, i.e., sets it to 0. - * @param i The bit position. - * @return A reference to the bit vector instance. - */ - BitVector& Reset(size_type i); + else + bits.insert(bits.end(), first, last); - /** - * Sets all bits to 0. - * @return A reference to the bit vector instance. - */ - BitVector& Reset(); + num_bits += bits_per_block * delta; + } - /** - * Toggles/flips a bit at a specific position. - * @param i The bit position. - * @return A reference to the bit vector instance. - */ - BitVector& Flip(size_type i); + /** + * Appends the bits in a given block. + * @param block The block containing bits to append. + */ + void Append(block_type block); - /** - * Computes the complement. - * @return A reference to the bit vector instance. - */ - BitVector& Flip(); + /** Appends a single bit to the end of the bit vector. + * @param bit The value of the bit. + */ + void PushBack(bool bit); - /** Retrieves a single bit. - * @param i The bit position. - * @return A mutable reference to the bit at position *i*. - */ - Reference operator[](size_type i); + /** + * Clears all bits in the bitvector. + */ + void Clear(); - /** - * Retrieves a single bit. - * @param i The bit position. - * @return A const-reference to the bit at position *i*. - */ - const_reference operator[](size_type i) const; + /** + * Resizes the bit vector to a new number of bits. + * @param n The new number of bits of the bit vector. + * @param value The bit value of new values, if the vector expands. + */ + void Resize(size_type n, bool value = false); - /** - * Counts the number of 1-bits in the bit vector. Also known as *population - * count* or *Hamming weight*. - * @return The number of bits set to 1. - */ - size_type Count() const; + /** + * Sets a bit at a specific position to a given value. + * @param i The bit position. + * @param bit The value assigned to position *i*. + * @return A reference to the bit vector instance. + */ + BitVector& Set(size_type i, bool bit = true); - /** - * Retrieves the number of blocks of the underlying storage. - * @param The number of blocks that represent `Size()` bits. - */ - size_type Blocks() const; + /** + * Sets all bits to 1. + * @return A reference to the bit vector instance. + */ + BitVector& Set(); - /** - * Retrieves the number of bits the bitvector consist of. - * @return The length of the bit vector in bits. - */ - size_type Size() const; + /** + * Resets a bit at a specific position, i.e., sets it to 0. + * @param i The bit position. + * @return A reference to the bit vector instance. + */ + BitVector& Reset(size_type i); - /** - * Checks whether the bit vector is empty. - * @return `true` iff the bitvector has zero length. - */ - bool Empty() const; + /** + * Sets all bits to 0. + * @return A reference to the bit vector instance. + */ + BitVector& Reset(); - /** - * Finds the bit position of of the first 1-bit. - * @return The position of the first bit that equals to one or `npos` if no - * such bit exists. - */ - size_type FindFirst() const; + /** + * Toggles/flips a bit at a specific position. + * @param i The bit position. + * @return A reference to the bit vector instance. + */ + BitVector& Flip(size_type i); - /** - * Finds the next 1-bit from a given starting position. - * - * @param i The index where to start looking. - * - * @return The position of the first bit that equals to 1 after position - * *i* or `npos` if no such bit exists. - */ - size_type FindNext(size_type i) const; + /** + * Computes the complement. + * @return A reference to the bit vector instance. + */ + BitVector& Flip(); - bool Serialize(SerialInfo* info) const; - static BitVector* Unserialize(UnserialInfo* info); + /** Retrieves a single bit. + * @param i The bit position. + * @return A mutable reference to the bit at position *i*. + */ + Reference operator[](size_type i); + + /** + * Retrieves a single bit. + * @param i The bit position. + * @return A const-reference to the bit at position *i*. + */ + const_reference operator[](size_type i) const; + + /** + * Counts the number of 1-bits in the bit vector. Also known as *population + * count* or *Hamming weight*. + * @return The number of bits set to 1. + */ + size_type Count() const; + + /** + * Retrieves the number of blocks of the underlying storage. + * @param The number of blocks that represent `Size()` bits. + */ + size_type Blocks() const; + + /** + * Retrieves the number of bits the bitvector consist of. + * @return The length of the bit vector in bits. + */ + size_type Size() const; + + /** + * Checks whether the bit vector is empty. + * @return `true` iff the bitvector has zero length. + */ + bool Empty() const; + + /** + * Finds the bit position of of the first 1-bit. + * @return The position of the first bit that equals to one or `npos` if no + * such bit exists. + */ + size_type FindFirst() const; + + /** + * Finds the next 1-bit from a given starting position. + * + * @param i The index where to start looking. + * + * @return The position of the first bit that equals to 1 after position + * *i* or `npos` if no such bit exists. + */ + size_type FindNext(size_type i) const; + + /** + * Serializes the bit vector. + * + * @param info The serializaton informationt to use. + * + * @return True if successful. + */ + bool Serialize(SerialInfo* info) const; + + /** + * Unserialize the bit vector. + * + * @param info The serializaton informationt to use. + * + * @return The unserialized bit vector, or null if an error occured. + */ + static BitVector* Unserialize(UnserialInfo* info); protected: - DECLARE_SERIAL(BitVector); + DECLARE_SERIAL(BitVector); private: - /** - * Computes the block index for a given bit position. - */ - static size_type block_index(size_type i) - { - return i / bits_per_block; - } + /** + * Computes the number of excess/unused bits in the bit vector. + */ + block_type extra_bits() const; - /** - * Computes the bit index within a given block for a given bit position. - */ - static block_type bit_index(size_type i) - { - return i % bits_per_block; - } + /** + * If the number of bits in the vector are not not a multiple of + * bitvector::bits_per_block, then the last block exhibits unused bits which + * this function resets. + */ + void zero_unused_bits(); - /** - * Computes the bitmask block to extract a bit a given bit position. - */ - static block_type bit_mask(size_type i) - { - return block_type(1) << bit_index(i); - } + /** + * Looks for the first 1-bit starting at a given position. + * @param i The block index to start looking. + * @return The block index of the first 1-bit starting from *i* or + * `bitvector::npos` if no 1-bit exists. + */ + size_type find_from(size_type i) const; - /** - * Computes the number of blocks needed to represent a given number of - * bits. - * @param bits the number of bits. - * @return The number of blocks to represent *bits* number of bits. - */ - static size_type bits_to_blocks(size_type bits) - { - return bits / bits_per_block - + static_cast(bits % bits_per_block != 0); - } + /** + * Computes the block index for a given bit position. + */ + static size_type block_index(size_type i) + { + return i / bits_per_block; + } - /** - * Computes the bit position first 1-bit in a given block. - * @param block The block to inspect. - * @return The bit position where *block* has its first bit set to 1. - */ - static size_type lowest_bit(block_type block); + /** + * Computes the bit index within a given block for a given bit position. + */ + static block_type bit_index(size_type i) + { + return i % bits_per_block; + } - /** - * Computes the number of excess/unused bits in the bit vector. - */ - block_type extra_bits() const; + /** + * Computes the bitmask block to extract a bit a given bit position. + */ + static block_type bit_mask(size_type i) + { + return block_type(1) << bit_index(i); + } - /** - * If the number of bits in the vector are not not a multiple of - * bitvector::bits_per_block, then the last block exhibits unused bits which - * this function resets. - */ - void zero_unused_bits(); + /** + * Computes the number of blocks needed to represent a given number of + * bits. + * @param bits the number of bits. + * @return The number of blocks to represent *bits* number of bits. + */ + static size_type bits_to_blocks(size_type bits) + { + return bits / bits_per_block + + static_cast(bits % bits_per_block != 0); + } - /** - * Looks for the first 1-bit starting at a given position. - * @param i The block index to start looking. - * @return The block index of the first 1-bit starting from *i* or - * `bitvector::npos` if no 1-bit exists. - */ - size_type find_from(size_type i) const; + /** + * Computes the bit position first 1-bit in a given block. + * @param block The block to inspect. + * @return The bit position where *block* has its first bit set to 1. + */ + static size_type lowest_bit(block_type block); - std::vector bits_; - size_type num_bits_; + std::vector bits; + size_type num_bits; }; } diff --git a/src/probabilistic/BloomFilter.cc b/src/probabilistic/BloomFilter.cc index 1b86ea1441..5613dcce05 100644 --- a/src/probabilistic/BloomFilter.cc +++ b/src/probabilistic/BloomFilter.cc @@ -1,3 +1,5 @@ +// See the file "COPYING" in the main distribution directory for copyright. + #include "BloomFilter.h" #include @@ -8,181 +10,184 @@ using namespace probabilistic; BloomFilter::BloomFilter() - : hasher_(NULL) - { - } + { + hasher = 0; + } -BloomFilter::BloomFilter(const Hasher* hasher) - : hasher_(hasher) - { - } +BloomFilter::BloomFilter(const Hasher* arg_hasher) + { + hasher = arg_hasher; + } BloomFilter::~BloomFilter() - { - if ( hasher_ ) - delete hasher_; - } + { + delete hasher; + } bool BloomFilter::Serialize(SerialInfo* info) const - { - return SerialObj::Serialize(info); - } + { + return SerialObj::Serialize(info); + } BloomFilter* BloomFilter::Unserialize(UnserialInfo* info) - { - return reinterpret_cast( - SerialObj::Unserialize(info, SER_BLOOMFILTER)); - } + { + return reinterpret_cast(SerialObj::Unserialize(info, SER_BLOOMFILTER)); + } bool BloomFilter::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_BLOOMFILTER, SerialObj); - if ( ! SERIALIZE(static_cast(hasher_->K())) ) - return false; - return SERIALIZE_STR(hasher_->Name().c_str(), hasher_->Name().size()); - } + + if ( ! SERIALIZE(static_cast(hasher->K())) ) + return false; + + return SERIALIZE_STR(hasher->Name().c_str(), hasher->Name().size()); + } bool BloomFilter::DoUnserialize(UnserialInfo* info) { DO_UNSERIALIZE(SerialObj); + uint16 k; if ( ! UNSERIALIZE(&k) ) - return false; - const char* name; - if ( ! UNSERIALIZE_STR(&name, 0) ) - return false; - hasher_ = Hasher::Create(k, name); + return false; + + const char* name; + if ( ! UNSERIALIZE_STR(&name, 0) ) + return false; + + hasher = Hasher::Create(k, name); + delete [] name; return true; - } - + } size_t BasicBloomFilter::M(double fp, size_t capacity) - { - double ln2 = std::log(2); - return std::ceil(-(capacity * std::log(fp) / ln2 / ln2)); - } + { + double ln2 = std::log(2); + return std::ceil(-(capacity * std::log(fp) / ln2 / ln2)); + } size_t BasicBloomFilter::K(size_t cells, size_t capacity) - { - double frac = static_cast(cells) / static_cast(capacity); - return std::ceil(frac * std::log(2)); - } + { + double frac = static_cast(cells) / static_cast(capacity); + return std::ceil(frac * std::log(2)); + } BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x, const BasicBloomFilter* y) - { - if ( ! x->hasher_->Equals(y->hasher_) ) - { - reporter->InternalError("incompatible hashers during Bloom filter merge"); - return NULL; - } - BasicBloomFilter* result = new BasicBloomFilter(); - result->hasher_ = x->hasher_->Clone(); - result->bits_ = new BitVector(*x->bits_ | *y->bits_); - return result; - } + { + if ( ! x->hasher->Equals(y->hasher) ) + reporter->InternalError("incompatible hashers during BasicBloomFilter merge"); + + BasicBloomFilter* result = new BasicBloomFilter(); + result->hasher = x->hasher->Clone(); + result->bits = new BitVector(*x->bits | *y->bits); + + return result; + } BasicBloomFilter::BasicBloomFilter() - : bits_(NULL) - { - } + { + bits = 0; + } BasicBloomFilter::BasicBloomFilter(const Hasher* hasher, size_t cells) - : BloomFilter(hasher), - bits_(new BitVector(cells)) - { - } + : BloomFilter(hasher) + { + bits = new BitVector(cells); + } IMPLEMENT_SERIAL(BasicBloomFilter, SER_BASICBLOOMFILTER) bool BasicBloomFilter::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_BASICBLOOMFILTER, BloomFilter); - return bits_->Serialize(info); - } + return bits->Serialize(info); + } bool BasicBloomFilter::DoUnserialize(UnserialInfo* info) { DO_UNSERIALIZE(BloomFilter); - bits_ = BitVector::Unserialize(info); - return bits_ != NULL; - } + bits = BitVector::Unserialize(info); + return (bits != 0); + } void BasicBloomFilter::AddImpl(const Hasher::digest_vector& h) - { - for ( size_t i = 0; i < h.size(); ++i ) - bits_->Set(h[i] % bits_->Size()); - } + { + for ( size_t i = 0; i < h.size(); ++i ) + bits->Set(h[i] % bits->Size()); + } size_t BasicBloomFilter::CountImpl(const Hasher::digest_vector& h) const - { - for ( size_t i = 0; i < h.size(); ++i ) - if ( ! (*bits_)[h[i] % bits_->Size()] ) - return 0; - return 1; - } + { + for ( size_t i = 0; i < h.size(); ++i ) + { + if ( ! (*bits)[h[i] % bits->Size()] ) + return 0; + } + return 1; + } CountingBloomFilter* CountingBloomFilter::Merge(const CountingBloomFilter* x, - const CountingBloomFilter* y) - { - if ( ! x->hasher_->Equals(y->hasher_) ) - { - reporter->InternalError("incompatible hashers during Bloom filter merge"); - return NULL; - } - CountingBloomFilter* result = new CountingBloomFilter(); - result->hasher_ = x->hasher_->Clone(); - result->cells_ = new CounterVector(*x->cells_ | *y->cells_); - return result; - } + const CountingBloomFilter* y) + { + if ( ! x->hasher->Equals(y->hasher) ) + reporter->InternalError("incompatible hashers during CountingBloomFilter merge"); + + CountingBloomFilter* result = new CountingBloomFilter(); + result->hasher = x->hasher->Clone(); + result->cells = new CounterVector(*x->cells | *y->cells); + + return result; + } CountingBloomFilter::CountingBloomFilter() - : cells_(NULL) - { - } + { + cells = 0; + } CountingBloomFilter::CountingBloomFilter(const Hasher* hasher, - size_t cells, size_t width) - : BloomFilter(hasher), - cells_(new CounterVector(width, cells)) - { - } - + size_t arg_cells, size_t width) + : BloomFilter(hasher) + { + cells = new CounterVector(width, arg_cells); + } IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER) bool CountingBloomFilter::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_COUNTINGBLOOMFILTER, BloomFilter); - return cells_->Serialize(info); - } + return cells->Serialize(info); + } bool CountingBloomFilter::DoUnserialize(UnserialInfo* info) { DO_UNSERIALIZE(BloomFilter); - cells_ = CounterVector::Unserialize(info); - return cells_ != NULL; - } + cells = CounterVector::Unserialize(info); + return (cells != 0); + } // TODO: Use partitioning in add/count to allow for reusing CMS bounds. - void CountingBloomFilter::AddImpl(const Hasher::digest_vector& h) - { - for ( size_t i = 0; i < h.size(); ++i ) - cells_->Increment(h[i] % cells_->Size()); - } + { + for ( size_t i = 0; i < h.size(); ++i ) + cells->Increment(h[i] % cells->Size()); + } size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const - { - CounterVector::size_type min = - std::numeric_limits::max(); - for ( size_t i = 0; i < h.size(); ++i ) - { - CounterVector::size_type cnt = cells_->Count(h[i] % cells_->Size()); - if ( cnt < min ) - min = cnt; - } - return min; - } + { + CounterVector::size_type min = + std::numeric_limits::max(); + + for ( size_t i = 0; i < h.size(); ++i ) + { + CounterVector::size_type cnt = cells->Count(h[i] % cells->Size()); + if ( cnt < min ) + min = cnt; + } + + return min; + } diff --git a/src/probabilistic/BloomFilter.h b/src/probabilistic/BloomFilter.h index 2fa849505d..4a6b01c484 100644 --- a/src/probabilistic/BloomFilter.h +++ b/src/probabilistic/BloomFilter.h @@ -1,5 +1,7 @@ -#ifndef BloomFilter_h -#define BloomFilter_h +// See the file "COPYING" in the main distribution directory for copyright. + +#ifndef PROBABILISTIC_BLOOMFILTER_H +#define PROBABILISTIC_BLOOMFILTER_H #include #include "BitVector.h" @@ -11,42 +13,65 @@ class CounterVector; /** * The abstract base class for Bloom filters. + * + * At this point we won't let the user choose the hasher, but we might open + * up the interface in the future. */ class BloomFilter : public SerialObj { public: - // At this point we won't let the user choose the hasher, but we might - // open up the interface in the future. - virtual ~BloomFilter(); + /** + * Destructor. + */ + virtual ~BloomFilter(); - /** - * Adds an element of type T to the Bloom filter. - * @param x The element to add - */ - template - void Add(const T& x) - { - AddImpl((*hasher_)(x)); - } + /** + * Adds an element of type T to the Bloom filter. + * @param x The element to add + */ + template + void Add(const T& x) + { + AddImpl((*hasher)(x)); + } - /** - * Retrieves the associated count of a given value. - * - * @param x The value of type `T` to check. - * - * @return The counter associated with *x*. - */ - template - size_t Count(const T& x) const - { - return CountImpl((*hasher_)(x)); - } + /** + * Retrieves the associated count of a given value. + * + * @param x The value of type `T` to check. + * + * @return The counter associated with *x*. + */ + template + size_t Count(const T& x) const + { + return CountImpl((*hasher)(x)); + } - bool Serialize(SerialInfo* info) const; - static BloomFilter* Unserialize(UnserialInfo* info); + /** + * Serializes the Bloom filter. + * + * @param info The serializaton information to use. + * + * @return True if successful. + */ + bool Serialize(SerialInfo* info) const; + + /** + * Unserializes a Bloom filter. + * + * @param info The serializaton information to use. + * + * @return The unserialized Bloom filter, or null if an error + * occured. + */ + static BloomFilter* Unserialize(UnserialInfo* info); protected: - DECLARE_ABSTRACT_SERIAL(BloomFilter); + DECLARE_ABSTRACT_SERIAL(BloomFilter); + /** + * Default constructor. + */ BloomFilter(); /** @@ -54,12 +79,28 @@ protected: * * @param hasher The hasher to use for this Bloom filter. */ - BloomFilter(const Hasher* hasher); + BloomFilter(const Hasher* hasher); - virtual void AddImpl(const Hasher::digest_vector& hashes) = 0; - virtual size_t CountImpl(const Hasher::digest_vector& hashes) const = 0; + /** + * Abstract method for implementinng the *Add* operation. + * + * @param hashes A set of *k* hashes for the item to add, computed by + * the internal hasher object. + * + */ + virtual void AddImpl(const Hasher::digest_vector& hashes) = 0; - const Hasher* hasher_; + /** + * Abstract method for implementing the *Count* operation. + * + * @param hashes A set of *k* hashes for the item to add, computed by + * the internal hasher object. + * + * @return Returns the counter associated with the hashed element. + */ + virtual size_t CountImpl(const Hasher::digest_vector& hashes) const = 0; + + const Hasher* hasher; }; /** @@ -67,50 +108,67 @@ protected: */ class BasicBloomFilter : public BloomFilter { public: - /** - * Computes the number of cells based a given false-positive rate and - * capacity. In the literature, this parameter often has the name *M*. - * - * @param fp The false-positive rate. - * - * @param capacity The number of exepected elements. - * - * Returns: The number cells needed to support a false-positive rate of *fp* - * with at most *capacity* elements. - */ - static size_t M(double fp, size_t capacity); + /** + * Constructs a basic Bloom filter with a given number of cells. The + * ideal number of cells can be computed with *M*. + * + * @param hasher The hasher to use. The ideal number of hash + * functions can be computed with *K*. + * + * @param cells The number of cells. + */ + BasicBloomFilter(const Hasher* hasher, size_t cells); - /** - * Computes the optimal number of hash functions based on the number cells - * and expected number of elements. - * - * @param cells The number of cells (*m*). - * - * @param capacity The maximum number of elements. - * - * Returns: the optimal number of hash functions for a false-positive rate of - * *fp* for at most *capacity* elements. - */ - static size_t K(size_t cells, size_t capacity); + /** + * Computes the number of cells based on a given false positive rate + * and capacity. In the literature, this parameter often has the name + * *M*. + * + * @param fp The false positive rate. + * + * @param capacity The expected number of elements that will be + * stored. + * + * Returns: The number cells needed to support a false positive rate + * of *fp* with at most *capacity* elements. + */ + static size_t M(double fp, size_t capacity); - static BasicBloomFilter* Merge(const BasicBloomFilter* x, - const BasicBloomFilter* y); + /** + * Computes the optimal number of hash functions based on the number cells + * and expected number of elements. + * + * @param cells The number of cells (*m*). + * + * @param capacity The maximum number of elements. + * + * Returns: the optimal number of hash functions for a false-positive + * rate of *fp* for at most *capacity* elements. + */ + static size_t K(size_t cells, size_t capacity); - /** - * Constructs a basic Bloom filter with a given number of cells and capacity. - */ - BasicBloomFilter(const Hasher* hasher, size_t cells); + /** + * Merges two basic Bloom filters. + * + * @return The merged Bloom filter. + */ + static BasicBloomFilter* Merge(const BasicBloomFilter* x, + const BasicBloomFilter* y); protected: - DECLARE_SERIAL(BasicBloomFilter); + DECLARE_SERIAL(BasicBloomFilter); - BasicBloomFilter(); + /** + * Default constructor. + */ + BasicBloomFilter(); - virtual void AddImpl(const Hasher::digest_vector& h); - virtual size_t CountImpl(const Hasher::digest_vector& h) const; + // Overridden from BloomFilter. + virtual void AddImpl(const Hasher::digest_vector& h); + virtual size_t CountImpl(const Hasher::digest_vector& h) const; private: - BitVector* bits_; + BitVector* bits; }; /** @@ -118,21 +176,40 @@ private: */ class CountingBloomFilter : public BloomFilter { public: - static CountingBloomFilter* Merge(const CountingBloomFilter* x, - const CountingBloomFilter* y); + /** + * Constructs a counting Bloom filter. + * + * @param hasher The hasher to use. The ideal number of hash + * functions can be computed with *K*. + * + * @param cells The number of cells to use. + * + * @param width The maximal bit-width of counter values. + */ + CountingBloomFilter(const Hasher* hasher, size_t cells, size_t width); - CountingBloomFilter(const Hasher* hasher, size_t cells, size_t width); + /** + * Merges two counting Bloom filters. + * + * @return The merged Bloom filter. + */ + static CountingBloomFilter* Merge(const CountingBloomFilter* x, + const CountingBloomFilter* y); protected: - DECLARE_SERIAL(CountingBloomFilter); + DECLARE_SERIAL(CountingBloomFilter); - CountingBloomFilter(); + /** + * Default constructor. + */ + CountingBloomFilter(); - virtual void AddImpl(const Hasher::digest_vector& h); - virtual size_t CountImpl(const Hasher::digest_vector& h) const; + // Overridden from BloomFilter. + virtual void AddImpl(const Hasher::digest_vector& h); + virtual size_t CountImpl(const Hasher::digest_vector& h) const; private: - CounterVector* cells_; + CounterVector* cells; }; } diff --git a/src/probabilistic/CounterVector.cc b/src/probabilistic/CounterVector.cc index 943749ad46..570ed1f8ea 100644 --- a/src/probabilistic/CounterVector.cc +++ b/src/probabilistic/CounterVector.cc @@ -1,3 +1,5 @@ +// See the file "COPYING" in the main distribution directory for copyright. + #include "CounterVector.h" #include @@ -6,154 +8,176 @@ using namespace probabilistic; -CounterVector::CounterVector(size_t width, size_t cells) - : bits_(new BitVector(width * cells)), - width_(width) - { - } +CounterVector::CounterVector(size_t arg_width, size_t cells) + { + bits = new BitVector(arg_width * cells); + width = arg_width; + } CounterVector::CounterVector(const CounterVector& other) - : bits_(new BitVector(*other.bits_)), - width_(other.width_) - { - } + { + bits = new BitVector(*other.bits); + width = other.width; + } CounterVector::~CounterVector() - { - delete bits_; - } + { + delete bits; + } bool CounterVector::Increment(size_type cell, count_type value) - { - assert(cell < Size()); - assert(value != 0); - size_t lsb = cell * width_; - bool carry = false; - for ( size_t i = 0; i < width_; ++i ) - { - bool b1 = (*bits_)[lsb + i]; - bool b2 = value & (1 << i); - (*bits_)[lsb + i] = b1 ^ b2 ^ carry; - carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) ); - } - if ( carry ) - for ( size_t i = 0; i < width_; ++i ) - bits_->Set(lsb + i); - return ! carry; - } + { + assert(cell < Size()); + assert(value != 0); + + size_t lsb = cell * width; + bool carry = false; + + for ( size_t i = 0; i < width; ++i ) + { + bool b1 = (*bits)[lsb + i]; + bool b2 = value & (1 << i); + (*bits)[lsb + i] = b1 ^ b2 ^ carry; + carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) ); + } + + if ( carry ) + { + for ( size_t i = 0; i < width; ++i ) + bits->Set(lsb + i); + } + + return ! carry; + } bool CounterVector::Decrement(size_type cell, count_type value) - { - assert(cell < Size()); - assert(value != 0); - value = ~value + 1; // A - B := A + ~B + 1 - bool carry = false; - size_t lsb = cell * width_; - for ( size_t i = 0; i < width_; ++i ) - { - bool b1 = (*bits_)[lsb + i]; - bool b2 = value & (1 << i); - (*bits_)[lsb + i] = b1 ^ b2 ^ carry; - carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) ); - } - return carry; - } + { + assert(cell < Size()); + assert(value != 0); + + value = ~value + 1; // A - B := A + ~B + 1 + bool carry = false; + size_t lsb = cell * width; + + for ( size_t i = 0; i < width; ++i ) + { + bool b1 = (*bits)[lsb + i]; + bool b2 = value & (1 << i); + (*bits)[lsb + i] = b1 ^ b2 ^ carry; + carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) ); + } + + return carry; + } CounterVector::count_type CounterVector::Count(size_type cell) const - { - assert(cell < Size()); - size_t cnt = 0, order = 1; - size_t lsb = cell * width_; - for (size_t i = lsb; i < lsb + width_; ++i, order <<= 1) - if ((*bits_)[i]) - cnt |= order; - return cnt; - } + { + assert(cell < Size()); + + size_t cnt = 0, order = 1; + size_t lsb = cell * width; + + for ( size_t i = lsb; i < lsb + width; ++i, order <<= 1 ) + if ( (*bits)[i] ) + cnt |= order; + + return cnt; + } CounterVector::size_type CounterVector::Size() const - { - return bits_->Size() / width_; - } + { + return bits->Size() / width; + } size_t CounterVector::Width() const - { - return width_; - } + { + return width; + } size_t CounterVector::Max() const - { - return std::numeric_limits::max() - >> (std::numeric_limits::digits - width_); - } + { + return std::numeric_limits::max() + >> (std::numeric_limits::digits - width); + } CounterVector& CounterVector::Merge(const CounterVector& other) - { - assert(Size() == other.Size()); - assert(Width() == other.Width()); - for ( size_t cell = 0; cell < Size(); ++cell ) - { - size_t lsb = cell * width_; - bool carry = false; - for ( size_t i = 0; i < width_; ++i ) - { - bool b1 = (*bits_)[lsb + i]; - bool b2 = (*other.bits_)[lsb + i]; - (*bits_)[lsb + i] = b1 ^ b2 ^ carry; - carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) ); - } - if ( carry ) - for ( size_t i = 0; i < width_; ++i ) - bits_->Set(lsb + i); - } - return *this; - } + { + assert(Size() == other.Size()); + assert(Width() == other.Width()); + + for ( size_t cell = 0; cell < Size(); ++cell ) + { + size_t lsb = cell * width; + bool carry = false; + + for ( size_t i = 0; i < width; ++i ) + { + bool b1 = (*bits)[lsb + i]; + bool b2 = (*other.bits)[lsb + i]; + (*bits)[lsb + i] = b1 ^ b2 ^ carry; + carry = ( b1 && b2 ) || ( carry && ( b1 != b2 ) ); + } + + if ( carry ) + { + for ( size_t i = 0; i < width; ++i ) + bits->Set(lsb + i); + } + } + + return *this; + } namespace probabilistic { CounterVector& CounterVector::operator|=(const CounterVector& other) -{ - return Merge(other); -} + { + return Merge(other); + } CounterVector operator|(const CounterVector& x, const CounterVector& y) -{ - CounterVector cv(x); - return cv |= y; -} + { + CounterVector cv(x); + return cv |= y; + } } bool CounterVector::Serialize(SerialInfo* info) const - { - return SerialObj::Serialize(info); - } + { + return SerialObj::Serialize(info); + } CounterVector* CounterVector::Unserialize(UnserialInfo* info) - { - return reinterpret_cast( - SerialObj::Unserialize(info, SER_COUNTERVECTOR)); - } + { + return reinterpret_cast(SerialObj::Unserialize(info, SER_COUNTERVECTOR)); + } IMPLEMENT_SERIAL(CounterVector, SER_COUNTERVECTOR) bool CounterVector::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_COUNTERVECTOR, SerialObj); - if ( ! bits_->Serialize(info) ) - return false; - return SERIALIZE(static_cast(width_)); - } + + if ( ! bits->Serialize(info) ) + return false; + + return SERIALIZE(static_cast(width)); + } bool CounterVector::DoUnserialize(UnserialInfo* info) { DO_UNSERIALIZE(SerialObj); - bits_ = BitVector::Unserialize(info); - if ( ! bits_ ) - return false; - uint64 width; - if ( ! UNSERIALIZE(&width) ) - return false; - width_ = static_cast(width); - return true; - } + bits = BitVector::Unserialize(info); + if ( ! bits ) + return false; + + uint64 width; + if ( ! UNSERIALIZE(&width) ) + return false; + + width = static_cast(width); + + return true; + } diff --git a/src/probabilistic/CounterVector.h b/src/probabilistic/CounterVector.h index 63445ec12d..178a68e8f2 100644 --- a/src/probabilistic/CounterVector.h +++ b/src/probabilistic/CounterVector.h @@ -1,5 +1,7 @@ -#ifndef CounterVector_h -#define CounterVector_h +// See the file "COPYING" in the main distribution directory for copyright. + +#ifndef PROBABILISTIC_COUNTERVECTOR_H +#define PROBABILISTIC_COUNTERVECTOR_H #include "SerialObj.h" @@ -8,123 +10,143 @@ namespace probabilistic { class BitVector; /** - * A vector of counters, each of which have a fixed number of bits. + * A vector of counters, each of which has a fixed number of bits. */ class CounterVector : public SerialObj { - CounterVector& operator=(const CounterVector&); public: - typedef size_t size_type; - typedef uint64 count_type; + typedef size_t size_type; + typedef uint64 count_type; - /** - * Constructs a counter vector having cells of a given width. - * - * @param width The number of bits that each cell occupies. - * - * @param cells The number of cells in the bitvector. - * - * @pre `cells > 0 && width > 0` - */ - CounterVector(size_t width, size_t cells = 1024); + /** + * Constructs a counter vector having cells of a given width. + * + * @param width The number of bits that each cell occupies. + * + * @param cells The number of cells in the bitvector. + * + * @pre `cells > 0 && width > 0` + */ + CounterVector(size_t width, size_t cells = 1024); /** * Copy-constructs a counter vector. * * @param other The counter vector to copy. */ - CounterVector(const CounterVector& other); + CounterVector(const CounterVector& other); - ~CounterVector(); + /** + * Destructor. + */ + ~CounterVector(); - /** - * Increments a given cell. - * - * @param cell The cell to increment. - * - * @param value The value to add to the current counter in *cell*. - * - * @return `true` if adding *value* to the counter in *cell* succeeded. - * - * @pre `cell < Size()` - */ - bool Increment(size_type cell, count_type value = 1); + /** + * Increments a given cell. + * + * @param cell The cell to increment. + * + * @param value The value to add to the current counter in *cell*. + * + * @return `true` if adding *value* to the counter in *cell* succeeded. + * + * @pre `cell < Size()` + */ + bool Increment(size_type cell, count_type value = 1); - /** - * Decrements a given cell. - * - * @param cell The cell to decrement. - * - * @param value The value to subtract from the current counter in *cell*. - * - * @return `true` if subtracting *value* from the counter in *cell* succeeded. - * - * @pre `cell < Size()` - */ - bool Decrement(size_type cell, count_type value = 1); + /** + * Decrements a given cell. + * + * @param cell The cell to decrement. + * + * @param value The value to subtract from the current counter in *cell*. + * + * @return `true` if subtracting *value* from the counter in *cell* succeeded. + * + * @pre `cell < Size()` + */ + bool Decrement(size_type cell, count_type value = 1); - /** - * Retrieves the counter of a given cell. - * - * @param cell The cell index to retrieve the count for. - * - * @return The counter associated with *cell*. - * - * @pre `cell < Size()` - */ - count_type Count(size_type cell) const; + /** + * Retrieves the counter of a given cell. + * + * @param cell The cell index to retrieve the count for. + * + * @return The counter associated with *cell*. + * + * @pre `cell < Size()` + */ + count_type Count(size_type cell) const; - /** - * Retrieves the number of cells in the storage. - * - * @return The number of cells. - */ - size_type Size() const; + /** + * Retrieves the number of cells in the storage. + * + * @return The number of cells. + */ + size_type Size() const; - /** - * Retrieves the counter width. - * - * @return The number of bits per counter. - */ - size_t Width() const; + /** + * Retrieves the counter width. + * + * @return The number of bits per counter. + */ + size_t Width() const; - /** - * Computes the maximum counter value. - * - * @return The maximum counter value based on the width. - */ - size_t Max() const; + /** + * Computes the maximum counter value. + * + * @return The maximum counter value based on the width. + */ + size_t Max() const; - /** - * Merges another counter vector into this instance by *adding* the counters - * of each cells. - * - * @param other The counter vector to merge into this instance. - * - * @return A reference to `*this`. - * - * @pre `Size() == other.Size() && Width() == other.Width()` - */ - CounterVector& Merge(const CounterVector& other); + /** + * Merges another counter vector into this instance by *adding* the + * counters of each cells. + * + * @param other The counter vector to merge into this instance. + * + * @return A reference to `*this`. + * + * @pre `Size() == other.Size() && Width() == other.Width()` + */ + CounterVector& Merge(const CounterVector& other); - /** - * An alias for ::Merge. - */ - CounterVector& operator|=(const CounterVector& other); + /** + * An alias for ::Merge. + */ + CounterVector& operator|=(const CounterVector& other); - friend CounterVector operator|(const CounterVector& x, - const CounterVector& y); + /** + * Serializes the bit vector. + * + * @param info The serializaton information to use. + * + * @return True if successful. + */ + bool Serialize(SerialInfo* info) const; - bool Serialize(SerialInfo* info) const; - static CounterVector* Unserialize(UnserialInfo* info); + /** + * Unserialize the counter vector. + * + * @param info The serializaton information to use. + * + * @return The unserialized counter vector, or null if an error + * occured. + */ + static CounterVector* Unserialize(UnserialInfo* info); protected: - DECLARE_SERIAL(CounterVector); + friend CounterVector operator|(const CounterVector& x, + const CounterVector& y); - CounterVector() { } + CounterVector() { } + + DECLARE_SERIAL(CounterVector); private: - BitVector* bits_; - size_t width_; + CounterVector& operator=(const CounterVector&); // Disable. + + BitVector* bits; + size_t width; }; } diff --git a/src/probabilistic/Hasher.cc b/src/probabilistic/Hasher.cc index c2f1110ecd..f9ce7bdd6b 100644 --- a/src/probabilistic/Hasher.cc +++ b/src/probabilistic/Hasher.cc @@ -1,66 +1,70 @@ +// See the file "COPYING" in the main distribution directory for copyright. #include #include "Hasher.h" - #include "digest.h" using namespace probabilistic; -Hasher::UHF::UHF(size_t seed, const std::string& extra) - : h_(compute_seed(seed, extra)) +UHF::UHF(size_t seed, const std::string& extra) + : h(compute_seed(seed, extra)) { } -Hasher::digest Hasher::UHF::hash(const void* x, size_t n) const +Hasher::digest UHF::hash(const void* x, size_t n) const { assert(n <= UHASH_KEY_SIZE); - return n == 0 ? 0 : h_(x, n); + return n == 0 ? 0 : h(x, n); } -size_t Hasher::UHF::compute_seed(size_t seed, const std::string& extra) +size_t UHF::compute_seed(size_t seed, const std::string& extra) { u_char buf[SHA256_DIGEST_LENGTH]; SHA256_CTX ctx; sha256_init(&ctx); + if ( extra.empty() ) { unsigned int first_seed = initial_seed(); sha256_update(&ctx, &first_seed, sizeof(first_seed)); } - else - { - sha256_update(&ctx, extra.c_str(), extra.size()); - } - sha256_update(&ctx, &seed, sizeof(seed)); - sha256_final(&ctx, buf); - // Take the first sizeof(size_t) bytes as seed. - return *reinterpret_cast(buf); - } + else + sha256_update(&ctx, extra.c_str(), extra.size()); + + sha256_update(&ctx, &seed, sizeof(seed)); + sha256_final(&ctx, buf); + + // Take the first sizeof(size_t) bytes as seed. + return *reinterpret_cast(buf); + } Hasher* Hasher::Create(size_t k, const std::string& name) { return new DefaultHasher(k, name); } -Hasher::Hasher(size_t k, const std::string& name) - : k_(k), name_(name) +Hasher::Hasher(size_t k, const std::string& arg_name) + : k(k) { + name = arg_name; } DefaultHasher::DefaultHasher(size_t k, const std::string& name) : Hasher(k, name) { for ( size_t i = 0; i < k; ++i ) - hash_functions_.push_back(UHF(i, name)); + hash_functions.push_back(UHF(i, name)); } Hasher::digest_vector DefaultHasher::Hash(const void* x, size_t n) const { digest_vector h(K(), 0); + for ( size_t i = 0; i < h.size(); ++i ) - h[i] = hash_functions_[i](x, n); + h[i] = hash_functions[i](x, n); + return h; } @@ -73,24 +77,25 @@ bool DefaultHasher::Equals(const Hasher* other) const { if ( typeid(*this) != typeid(*other) ) return false; + const DefaultHasher* o = static_cast(other); - return hash_functions_ == o->hash_functions_; + return hash_functions == o->hash_functions; } DoubleHasher::DoubleHasher(size_t k, const std::string& name) - : Hasher(k, name), - h1_(1, name), - h2_(2, name) + : Hasher(k, name), h1(1, name), h2(2, name) { } Hasher::digest_vector DoubleHasher::Hash(const void* x, size_t n) const { - digest h1 = h1_(x, n); - digest h2 = h2_(x, n); + digest d1 = h1(x, n); + digest d2 = h2(x, n); digest_vector h(K(), 0); + for ( size_t i = 0; i < h.size(); ++i ) - h[i] = h1 + i * h2; + h[i] = d1 + i * d2; + return h; } @@ -103,7 +108,7 @@ bool DoubleHasher::Equals(const Hasher* other) const { if ( typeid(*this) != typeid(*other) ) return false; - const DoubleHasher* o = static_cast(other); - return h1_ == o->h1_ && h2_ == o->h2_; - } + const DoubleHasher* o = static_cast(other); + return h1 == o->h1 && h2 == o->h2; + } diff --git a/src/probabilistic/Hasher.h b/src/probabilistic/Hasher.h index 0231343dcd..62c5d58d1f 100644 --- a/src/probabilistic/Hasher.h +++ b/src/probabilistic/Hasher.h @@ -1,5 +1,7 @@ -#ifndef Hasher_h -#define Hasher_h +// See the file "COPYING" in the main distribution directory for copyright. + +#ifndef PROBABILISTIC_HASHER_H +#define PROBABILISTIC_HASHER_H #include "Hash.h" #include "H3.h" @@ -7,123 +9,197 @@ namespace probabilistic { /** - * The abstract base class for hashers, i.e., constructs which hash elements - * *k* times. + * Abstract base class for hashers. A hasher creates a family of hash + * functions to hash an element *k* times. */ class Hasher { public: - typedef hash_t digest; - typedef std::vector digest_vector; + typedef hash_t digest; + typedef std::vector digest_vector; - /** - * Constructs the hashing policy used by the implementation. - * - * @todo This factory function exists because the HashingPolicy class - * hierachy is not yet serializable. - */ + /** + * Destructor. + */ + virtual ~Hasher() { } + + /** + * Computes hash values for an element. + * + * @param x The element to hash. + * + * @return Vector of *k* hash values. + */ + template + digest_vector operator()(const T& x) const + { + return Hash(&x, sizeof(T)); + } + + /** + * Computes the hashes for a set of bytes. + * + * @param x Pointer to first byte to hash. + * + * @param n Number of bytes to hash. + * + * @return Vector of *k* hash values. + * + */ + virtual digest_vector Hash(const void* x, size_t n) const = 0; + + /** + * Returns a deep copy of the hasher. + */ + virtual Hasher* Clone() const = 0; + + /** + * Returns true if two hashers are identical. + */ + virtual bool Equals(const Hasher* other) const = 0; + + /** + * Returns the number *k* of hash functions the hashers applies. + */ + size_t K() const { return k; } + + /** + * Returns the hasher's name. TODO: What's this? + */ + const std::string& Name() const { return name; } + + /** + * Constructs the hasher used by the implementation. This hardcodes a + * specific hashing policy. It exists only because the HashingPolicy + * class hierachy is not yet serializable. + * + * @param k The number of hash functions to apply. + * + * @param name The hasher's name. + * + * @return Returns a new hasher instance. + */ static Hasher* Create(size_t k, const std::string& name); - virtual ~Hasher() { } - - template - digest_vector operator()(const T& x) const - { - return Hash(&x, sizeof(T)); - } - - virtual digest_vector Hash(const void* x, size_t n) const = 0; - - virtual Hasher* Clone() const = 0; - - virtual bool Equals(const Hasher* other) const = 0; - - size_t K() const { return k_; } - const std::string& Name() const { return name_; } - protected: - /** - * A universal hash function family. - */ - class UHF { - public: - /** - * Constructs an H3 hash function seeded with a given seed and an optional - * extra seed to replace the initial Bro seed. - * - * @param seed The seed to use for this instance. - * - * @param extra If not empty, this parameter replaces the initial seed to - * compute the seed for t to compute the - * seed - * NUL-terminated string as additional seed. - */ - UHF(size_t seed, const std::string& extra = ""); + Hasher(size_t k, const std::string& name); - template - digest operator()(const T& x) const - { - return hash(&x, sizeof(T)); - } - - digest operator()(const void* x, size_t n) const - { - return hash(x, n); - } - - friend bool operator==(const UHF& x, const UHF& y) - { - return x.h_ == y.h_; - } - - friend bool operator!=(const UHF& x, const UHF& y) - { - return ! (x == y); - } - - digest hash(const void* x, size_t n) const; - - private: - static size_t compute_seed(size_t seed, const std::string& extra); - - H3 h_; - }; - - Hasher(size_t k, const std::string& name); - -private: - const size_t k_; - std::string name_; + private: + const size_t k; + std::string name; }; /** - * The default hashing policy. Performs *k* hash function computations. + * A universal hash function family. This is a helper class that Hasher + * implementations can use in their implementation. + */ +class UHF { +public: + /** + * Constructs an H3 hash function seeded with a given seed and an + * optional extra seed to replace the initial Bro seed. + * + * @param seed The seed to use for this instance. + * + * @param extra If not empty, this parameter replaces the initial + * seed to compute the seed for t to compute the seed NUL-terminated + * string as additional seed. + */ + UHF(size_t seed, const std::string& extra = ""); + + template + Hasher::digest operator()(const T& x) const + { + return hash(&x, sizeof(T)); + } + + /** + * Computes hash values for an element. + * + * @param x The element to hash. + * + * @return Vector of *k* hash values. + */ + Hasher::digest operator()(const void* x, size_t n) const + { + return hash(x, n); + } + + /** + * Computes the hashes for a set of bytes. + * + * @param x Pointer to first byte to hash. + * + * @param n Number of bytes to hash. + * + * @return Vector of *k* hash values. + * + */ + Hasher::digest hash(const void* x, size_t n) const; + + friend bool operator==(const UHF& x, const UHF& y) + { + return x.h == y.h; + } + + friend bool operator!=(const UHF& x, const UHF& y) + { + return ! (x == y); + } + +private: + static size_t compute_seed(size_t seed, const std::string& extra); + + H3 h; +}; + + +/** + * A hasher implementing the default hashing policy. Uses *k* separate hash + * functions internally. */ class DefaultHasher : public Hasher { public: - DefaultHasher(size_t k, const std::string& name); + /** + * Constructor for a hasher with *k* hash functions. + * + * @param k The number of hash functions to use. + * + * @param name The name of the hasher. + */ + DefaultHasher(size_t k, const std::string& name); - virtual digest_vector Hash(const void* x, size_t n) const /* final */; - virtual DefaultHasher* Clone() const /* final */; - virtual bool Equals(const Hasher* other) const /* final */; + // Overridden from Hasher. + virtual digest_vector Hash(const void* x, size_t n) const /* final */; + virtual DefaultHasher* Clone() const /* final */; + virtual bool Equals(const Hasher* other) const /* final */; private: - std::vector hash_functions_; + std::vector hash_functions; }; /** - * The *double-hashing* policy. Uses a linear combination of two hash functions. + * The *double-hashing* policy. Uses a linear combination of two hash + * functions. */ class DoubleHasher : public Hasher { public: - DoubleHasher(size_t k, const std::string& name); + /** + * Constructor for a double hasher with *k* hash functions. + * + * @param k The number of hash functions to use. + * + * @param name The name of the hasher. + */ + DoubleHasher(size_t k, const std::string& name); - virtual digest_vector Hash(const void* x, size_t n) const /* final */; - virtual DoubleHasher* Clone() const /* final */; - virtual bool Equals(const Hasher* other) const /* final */; + // Overridden from Hasher. + virtual digest_vector Hash(const void* x, size_t n) const /* final */; + virtual DoubleHasher* Clone() const /* final */; + virtual bool Equals(const Hasher* other) const /* final */; private: - UHF h1_; - UHF h2_; + UHF h1; + UHF h2; }; } diff --git a/src/probabilistic/bloom-filter.bif b/src/probabilistic/bloom-filter.bif index 3c409b1b0f..cbbff85d7d 100644 --- a/src/probabilistic/bloom-filter.bif +++ b/src/probabilistic/bloom-filter.bif @@ -31,18 +31,19 @@ module GLOBAL; ## Returns: A Bloom filter handle. function bloomfilter_basic_init%(fp: double, capacity: count, name: string &default=""%): opaque of bloomfilter - %{ - if ( fp < 0.0 || fp > 1.0 ) - { - reporter->Error("false-positive rate must take value between 0 and 1"); - return NULL; - } + %{ + if ( fp < 0.0 || fp > 1.0 ) + { + reporter->Error("false-positive rate must take value between 0 and 1"); + return 0; + } - size_t cells = BasicBloomFilter::M(fp, capacity); - size_t optimal_k = BasicBloomFilter::K(cells, capacity); - const Hasher* h = Hasher::Create(optimal_k, name->CheckString()); - return new BloomFilterVal(new BasicBloomFilter(h, cells)); - %} + size_t cells = BasicBloomFilter::M(fp, capacity); + size_t optimal_k = BasicBloomFilter::K(cells, capacity); + const Hasher* h = Hasher::Create(optimal_k, name->CheckString()); + + return new BloomFilterVal(new BasicBloomFilter(h, cells)); + %} ## Creates a counting Bloom filter. ## @@ -59,20 +60,22 @@ function bloomfilter_basic_init%(fp: double, capacity: count, ## ## Returns: A Bloom filter handle. function bloomfilter_counting_init%(k: count, cells: count, max: count, - name: string &default=""%): opaque of bloomfilter - %{ - if ( max == 0 ) - { - reporter->Error("max counter value must be greater than 0"); - return NULL; - } + name: string &default=""%): opaque of bloomfilter + %{ + if ( max == 0 ) + { + reporter->Error("max counter value must be greater than 0"); + return 0; + } - const Hasher* h = Hasher::Create(k, name->CheckString()); - uint16 width = 1; - while ( max >>= 1 ) - ++width; - return new BloomFilterVal(new CountingBloomFilter(h, cells, width)); - %} + const Hasher* h = Hasher::Create(k, name->CheckString()); + + uint16 width = 1; + while ( max >>= 1 ) + ++width; + + return new BloomFilterVal(new CountingBloomFilter(h, cells, width)); + %} ## Adds an element to a Bloom filter. ## @@ -80,16 +83,20 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count, ## ## x: The element to add. function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any - %{ - BloomFilterVal* bfv = static_cast(bf); - if ( ! bfv->Type() && ! bfv->Typify(x->Type()) ) - reporter->Error("failed to set Bloom filter type"); - else if ( bfv->Type() != x->Type() ) - reporter->Error("incompatible Bloom filter types"); - else - bfv->Add(x); - return NULL; - %} + %{ + BloomFilterVal* bfv = static_cast(bf); + + if ( ! bfv->Type() && ! bfv->Typify(x->Type()) ) + reporter->Error("failed to set Bloom filter type"); + + else if ( ! same_type(bfv->Type(), x->Type()) ) + reporter->Error("incompatible Bloom filter types"); + + else + bfv->Add(x); + + return 0; + %} ## Retrieves the counter for a given element in a Bloom filter. ## @@ -99,16 +106,20 @@ function bloomfilter_add%(bf: opaque of bloomfilter, x: any%): any ## ## Returns: the counter associated with *x* in *bf*. function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count - %{ - const BloomFilterVal* bfv = static_cast(bf); - if ( ! bfv->Type() ) - reporter->Error("cannot perform lookup on untyped Bloom filter"); - else if ( bfv->Type() != x->Type() ) - reporter->Error("incompatible Bloom filter types"); - else - return new Val(static_cast(bfv->Count(x)), TYPE_COUNT); - return new Val(0, TYPE_COUNT); - %} + %{ + const BloomFilterVal* bfv = static_cast(bf); + + if ( ! bfv->Type() ) + reporter->Error("cannot perform lookup on untyped Bloom filter"); + + else if ( ! same_type(bfv->Type(), x->Type()) ) + reporter->Error("incompatible Bloom filter types"); + + else + return new Val(static_cast(bfv->Count(x)), TYPE_COUNT); + + return new Val(0, TYPE_COUNT); + %} ## Merges two Bloom filters. ## @@ -118,13 +129,16 @@ function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count ## ## Returns: The union of *bf1* and *bf2*. function bloomfilter_merge%(bf1: opaque of bloomfilter, - bf2: opaque of bloomfilter%): opaque of bloomfilter - %{ - const BloomFilterVal* bfv1 = static_cast(bf1); - const BloomFilterVal* bfv2 = static_cast(bf2); - if ( bfv1->Type() != bfv2->Type() ) - reporter->Error("incompatible Bloom filter types"); - else - return BloomFilterVal::Merge(bfv1, bfv2); - return NULL; - %} + bf2: opaque of bloomfilter%): opaque of bloomfilter + %{ + const BloomFilterVal* bfv1 = static_cast(bf1); + const BloomFilterVal* bfv2 = static_cast(bf2); + + if ( ! same_type(bfv1->Type(), bfv2->Type()) ) + { + reporter->Error("incompatible Bloom filter types"); + return 0; + } + + return BloomFilterVal::Merge(bfv1, bfv2); + %} diff --git a/src/util.cc b/src/util.cc index 81ec135f98..6bea2eb7f1 100644 --- a/src/util.cc +++ b/src/util.cc @@ -803,10 +803,10 @@ void init_random_seed(uint32 seed, const char* read_file, const char* write_file bro_srandom(seed, seeds_done); if ( ! first_seed_saved ) - { - first_seed = seed; - first_seed_saved = true; - } + { + first_seed = seed; + first_seed_saved = true; + } if ( ! hmac_key_set ) { @@ -820,9 +820,9 @@ void init_random_seed(uint32 seed, const char* read_file, const char* write_file } unsigned int initial_seed() - { - return first_seed; -} + { + return first_seed; + } bool have_random_seed() { @@ -830,7 +830,7 @@ bool have_random_seed() } long int bro_prng(long int state) - { + { // Use our own simple linear congruence PRNG to make sure we are // predictable across platforms. static const long int m = 2147483647; @@ -844,14 +844,14 @@ long int bro_prng(long int state) state += m; return state; - } + } long int bro_random() { if ( ! bro_rand_determistic ) return random(); // Use system PRNG. - bro_rand_state = bro_prng(bro_rand_state); + bro_rand_state = bro_prng(bro_rand_state); return bro_rand_state; } diff --git a/src/util.h b/src/util.h index 5689253d95..aaad2d9403 100644 --- a/src/util.h +++ b/src/util.h @@ -166,15 +166,15 @@ extern void init_random_seed(uint32 seed, const char* load_file, const char* write_file); // Retrieves the initial seed computed after the very first call to -// init_random_seed(). Repeated calls to init_random_seed() will not affect the -// return value of this function. +// init_random_seed(). Repeated calls to init_random_seed() will not affect +// the return value of this function. unsigned int initial_seed(); // Returns true if the user explicitly set a seed via init_random_seed(); extern bool have_random_seed(); -// A simple linear congruence PRNG. It takes its state as argument and returns -// a new random value, which can serve as state for subsequent calls. +// A simple linear congruence PRNG. It takes its state as argument and +// returns a new random value, which can serve as state for subsequent calls. long int bro_prng(long int state); // Replacement for the system random(), to which is normally falls back diff --git a/testing/btest/Baseline/bifs.bloomfilter/output b/testing/btest/Baseline/bifs.bloomfilter/output index 4fe2ae1ecc..14e1f038c0 100644 --- a/testing/btest/Baseline/bifs.bloomfilter/output +++ b/testing/btest/Baseline/bifs.bloomfilter/output @@ -1,3 +1,9 @@ +error: incompatible Bloom filter types +error: incompatible Bloom filter types +error: incompatible Bloom filter types +error: incompatible Bloom filter types +error: false-positive rate must take value between 0 and 1 +error: false-positive rate must take value between 0 and 1 0 1 1 diff --git a/testing/btest/bifs/bloomfilter.bro b/testing/btest/bifs/bloomfilter.bro index f69ddbda0c..3b40f29553 100644 --- a/testing/btest/bifs/bloomfilter.bro +++ b/testing/btest/bifs/bloomfilter.bro @@ -1,4 +1,4 @@ -# @TEST-EXEC: bro -b %INPUT >output +# @TEST-EXEC: bro -b %INPUT >output 2>&1 # @TEST-EXEC: btest-diff output function test_basic_bloom_filter() From c89f61917b8b7a6ab8014fad211c879681c3ad5f Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Tue, 23 Jul 2013 18:44:22 -0700 Subject: [PATCH 095/118] Updating NEWS. --- NEWS | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/NEWS b/NEWS index 1fce6b1d9d..b1a5adc12b 100644 --- a/NEWS +++ b/NEWS @@ -108,6 +108,18 @@ New Functionality shunting, and sampling; plus plugin support to customize filters dynamically. +- Bro now provides Bloom filters of two kinds: basic Bloom filters + supporting membership tests, and counting Bloom filters that track + the frequency of elements. The corresponding functions are: + + bloomfilter_basic_init(fp: double, capacity: count, name: string &default=""): opaque of bloomfilter + bloomfilter_counting_init(k: count, cells: count, max: count, name: string &default=""): opaque of bloomfilter + bloomfilter_add(bf: opaque of bloomfilter, x: any) + bloomfilter_lookup(bf: opaque of bloomfilter, x: any): count + bloomfilter_merge(bf1: opaque of bloomfilter, bf2: opaque of bloomfilter): opaque of bloomfilter + + See TODO for full documentation. + Changed Functionality ~~~~~~~~~~~~~~~~~~~~~ From 75814e58e481f723868b644ba9fd06dba2fffa20 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Wed, 24 Jul 2013 00:35:46 -0400 Subject: [PATCH 096/118] Fix a bug with getting analyzer tags. --- src/analyzer/analyzer.bif | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/analyzer/analyzer.bif b/src/analyzer/analyzer.bif index 8b5a85956c..4d70816075 100644 --- a/src/analyzer/analyzer.bif +++ b/src/analyzer/analyzer.bif @@ -46,5 +46,6 @@ function __name%(atype: Analyzer::Tag%) : string function __tag%(name: string%) : Analyzer::Tag %{ - return new Val(analyzer_mgr->GetAnalyzerTag(name->CheckString()), TYPE_ENUM); + analyzer::Tag t = analyzer_mgr->GetAnalyzerTag(name->CheckString()); + return t.AsEnumVal()->Ref(); %} From 5383e8f75bae11bc5da30acf0b77493b90e5f71c Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Wed, 24 Jul 2013 11:21:10 +0200 Subject: [PATCH 097/118] Add bloomfilter_clear() BiF. --- src/OpaqueVal.cc | 5 +++++ src/OpaqueVal.h | 1 + src/probabilistic/BloomFilter.cc | 10 ++++++++++ src/probabilistic/BloomFilter.h | 11 +++++++++++ src/probabilistic/CounterVector.cc | 5 +++++ src/probabilistic/CounterVector.h | 5 +++++ src/probabilistic/bloom-filter.bif | 16 ++++++++++++++++ 7 files changed, 53 insertions(+) diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index efdd890f70..19a372c005 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -578,6 +578,11 @@ size_t BloomFilterVal::Count(const Val* val) const return cnt; } +void BloomFilterVal::Clear() + { + bloom_filter->Clear(); + } + BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x, const BloomFilterVal* y) { diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h index ea704cb70a..cfb184fc77 100644 --- a/src/OpaqueVal.h +++ b/src/OpaqueVal.h @@ -125,6 +125,7 @@ public: void Add(const Val* val); size_t Count(const Val* val) const; + void Clear(); static BloomFilterVal* Merge(const BloomFilterVal* x, const BloomFilterVal* y); diff --git a/src/probabilistic/BloomFilter.cc b/src/probabilistic/BloomFilter.cc index 5613dcce05..c78cd4193d 100644 --- a/src/probabilistic/BloomFilter.cc +++ b/src/probabilistic/BloomFilter.cc @@ -74,6 +74,11 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity) return std::ceil(frac * std::log(2)); } +void BasicBloomFilter::Clear() + { + bits->Clear(); + } + BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x, const BasicBloomFilter* y) { @@ -191,3 +196,8 @@ size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const return min; } + +void CountingBloomFilter::Clear() + { + cells->Clear(); + } diff --git a/src/probabilistic/BloomFilter.h b/src/probabilistic/BloomFilter.h index 4a6b01c484..55bc76fca7 100644 --- a/src/probabilistic/BloomFilter.h +++ b/src/probabilistic/BloomFilter.h @@ -47,6 +47,11 @@ public: return CountImpl((*hasher)(x)); } + /** + * Removes all elements, i.e., resets all bits in the underlying bit vector. + */ + virtual void Clear() = 0; + /** * Serializes the Bloom filter. * @@ -147,6 +152,9 @@ public: */ static size_t K(size_t cells, size_t capacity); + // Overridden from BloomFilter. + virtual void Clear(); + /** * Merges two basic Bloom filters. * @@ -188,6 +196,9 @@ public: */ CountingBloomFilter(const Hasher* hasher, size_t cells, size_t width); + // Overridden from BloomFilter. + virtual void Clear(); + /** * Merges two counting Bloom filters. * diff --git a/src/probabilistic/CounterVector.cc b/src/probabilistic/CounterVector.cc index 570ed1f8ea..00fa7fb8c0 100644 --- a/src/probabilistic/CounterVector.cc +++ b/src/probabilistic/CounterVector.cc @@ -70,6 +70,11 @@ bool CounterVector::Decrement(size_type cell, count_type value) return carry; } +void CounterVector::Clear() + { + bits->Clear(); + } + CounterVector::count_type CounterVector::Count(size_type cell) const { assert(cell < Size()); diff --git a/src/probabilistic/CounterVector.h b/src/probabilistic/CounterVector.h index 178a68e8f2..896f98ef1e 100644 --- a/src/probabilistic/CounterVector.h +++ b/src/probabilistic/CounterVector.h @@ -77,6 +77,11 @@ public: */ count_type Count(size_type cell) const; + /** + * Sets all counters to 0. + */ + void Clear(); + /** * Retrieves the number of cells in the storage. * diff --git a/src/probabilistic/bloom-filter.bif b/src/probabilistic/bloom-filter.bif index cbbff85d7d..9df168be0e 100644 --- a/src/probabilistic/bloom-filter.bif +++ b/src/probabilistic/bloom-filter.bif @@ -121,6 +121,22 @@ function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count return new Val(0, TYPE_COUNT); %} +## Removes all elements from a Bloom filter. This function sets resets all bits +## in the underlying bitvector to 0 but does not change the parameterization of +## the Bloom filter, such as the element type and the hasher seed. +## +## bf: The Bloom filter handle. +function bloomfilter_clear%(bf: opaque of bloomfilter%): any + %{ + BloomFilterVal* bfv = static_cast(bf); + + if ( bfv->Type() ) // Untyped Bloom filters are already empty. + bfv->Clear(); + + return 0; + %} + + ## Merges two Bloom filters. ## ## bf1: The first Bloom filter handle. From 5736aef440574389dda6555642ee7e938156dcf1 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Wed, 24 Jul 2013 13:05:38 +0200 Subject: [PATCH 098/118] Refactor Bloom filter merging. --- src/OpaqueVal.cc | 31 ++++++++--- src/OpaqueVal.h | 22 -------- src/probabilistic/BloomFilter.cc | 92 +++++++++++++++++++++++--------- src/probabilistic/BloomFilter.h | 36 +++++++------ 4 files changed, 109 insertions(+), 72 deletions(-) diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index 19a372c005..feff4f3cc0 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -584,21 +584,36 @@ void BloomFilterVal::Clear() } BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x, - const BloomFilterVal* y) + const BloomFilterVal* y) { if ( ! same_type(x->Type(), y->Type()) ) + { reporter->InternalError("cannot merge Bloom filters with different types"); + return 0; + } - BloomFilterVal* result; + if ( typeid(*x->bloom_filter) != typeid(*y->bloom_filter) ) + { + reporter->InternalError("cannot merge different Bloom filter types"); + return 0; + } - if ( (result = DoMerge(x, y)) ) - return result; + probabilistic::BloomFilter* copy = x->bloom_filter->Clone(); + bool success = copy->Merge(y->bloom_filter); + if ( ! success ) + { + reporter->InternalError("failed to merge Bloom filter"); + return 0; + } - else if ( (result = DoMerge(x, y)) ) - return result; + BloomFilterVal* merged = new BloomFilterVal(copy); + if ( ! merged->Typify(x->Type()) ) + { + reporter->InternalError("failed to set type on merged Bloom filter"); + return 0; + } - reporter->InternalError("failed to merge Bloom filters"); - return 0; + return merged; } BloomFilterVal::~BloomFilterVal() diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h index cfb184fc77..360bb69803 100644 --- a/src/OpaqueVal.h +++ b/src/OpaqueVal.h @@ -142,28 +142,6 @@ private: BloomFilterVal(const BloomFilterVal&); BloomFilterVal& operator=(const BloomFilterVal&); - template - static BloomFilterVal* DoMerge(const BloomFilterVal* x, - const BloomFilterVal* y) - { - if ( typeid(*x->bloom_filter) != typeid(*y->bloom_filter) ) - reporter->InternalError("cannot merge different Bloom filter types"); - - if ( typeid(T) != typeid(*x->bloom_filter) ) - return 0; - - const T* a = static_cast(x->bloom_filter); - const T* b = static_cast(y->bloom_filter); - - BloomFilterVal* merged = new BloomFilterVal(T::Merge(a, b)); - assert(merged); - - if ( ! merged->Typify(x->Type()) ) - reporter->InternalError("failed to set type on merged Bloom filter"); - - return merged; - } - BroType* type; CompositeHash* hash; probabilistic::BloomFilter* bloom_filter; diff --git a/src/probabilistic/BloomFilter.cc b/src/probabilistic/BloomFilter.cc index c78cd4193d..132cf376ec 100644 --- a/src/probabilistic/BloomFilter.cc +++ b/src/probabilistic/BloomFilter.cc @@ -79,17 +79,37 @@ void BasicBloomFilter::Clear() bits->Clear(); } -BasicBloomFilter* BasicBloomFilter::Merge(const BasicBloomFilter* x, - const BasicBloomFilter* y) +bool BasicBloomFilter::Merge(const BloomFilter* other) { - if ( ! x->hasher->Equals(y->hasher) ) - reporter->InternalError("incompatible hashers during BasicBloomFilter merge"); + if ( typeid(*this) != typeid(*other) ) + return 0; - BasicBloomFilter* result = new BasicBloomFilter(); - result->hasher = x->hasher->Clone(); - result->bits = new BitVector(*x->bits | *y->bits); + const BasicBloomFilter* o = static_cast(other); - return result; + if ( ! hasher->Equals(o->hasher) ) + { + reporter->InternalError("incompatible hashers in BasicBloomFilter merge"); + return false; + } + else if ( bits->Size() != o->bits->Size() ) + { + reporter->InternalError("different bitvector size in BasicBloomFilter merge"); + return false; + } + + (*bits) |= *o->bits; + + return true; + } + +BasicBloomFilter* BasicBloomFilter::Clone() const + { + BasicBloomFilter* copy = new BasicBloomFilter(); + + copy->hasher = hasher->Clone(); + copy->bits = new BitVector(*bits); + + return copy; } BasicBloomFilter::BasicBloomFilter() @@ -135,19 +155,6 @@ size_t BasicBloomFilter::CountImpl(const Hasher::digest_vector& h) const return 1; } -CountingBloomFilter* CountingBloomFilter::Merge(const CountingBloomFilter* x, - const CountingBloomFilter* y) - { - if ( ! x->hasher->Equals(y->hasher) ) - reporter->InternalError("incompatible hashers during CountingBloomFilter merge"); - - CountingBloomFilter* result = new CountingBloomFilter(); - result->hasher = x->hasher->Clone(); - result->cells = new CounterVector(*x->cells | *y->cells); - - return result; - } - CountingBloomFilter::CountingBloomFilter() { cells = 0; @@ -160,6 +167,44 @@ CountingBloomFilter::CountingBloomFilter(const Hasher* hasher, cells = new CounterVector(width, arg_cells); } +void CountingBloomFilter::Clear() + { + cells->Clear(); + } + +bool CountingBloomFilter::Merge(const BloomFilter* other) + { + if ( typeid(*this) != typeid(*other) ) + return 0; + + const CountingBloomFilter* o = static_cast(other); + + if ( ! hasher->Equals(o->hasher) ) + { + reporter->InternalError("incompatible hashers in CountingBloomFilter merge"); + return false; + } + else if ( cells->Size() != o->cells->Size() ) + { + reporter->InternalError("different bitvector size in CountingBloomFilter merge"); + return false; + } + + (*cells) |= *o->cells; + + return true; + } + +CountingBloomFilter* CountingBloomFilter::Clone() const + { + CountingBloomFilter* copy = new CountingBloomFilter(); + + copy->hasher = hasher->Clone(); + copy->cells = new CounterVector(*cells); + + return copy; + } + IMPLEMENT_SERIAL(CountingBloomFilter, SER_COUNTINGBLOOMFILTER) bool CountingBloomFilter::DoSerialize(SerialInfo* info) const @@ -196,8 +241,3 @@ size_t CountingBloomFilter::CountImpl(const Hasher::digest_vector& h) const return min; } - -void CountingBloomFilter::Clear() - { - cells->Clear(); - } diff --git a/src/probabilistic/BloomFilter.h b/src/probabilistic/BloomFilter.h index 55bc76fca7..2ab5b89941 100644 --- a/src/probabilistic/BloomFilter.h +++ b/src/probabilistic/BloomFilter.h @@ -52,6 +52,22 @@ public: */ virtual void Clear() = 0; + /** + * Merges another Bloom filter into a copy of this one. + * + * @param other The other Bloom filter. + * + * @return `true` on success. + */ + virtual bool Merge(const BloomFilter* other) = 0; + + /** + * Constructs a copy of this Bloom filter. + * + * @return A copy of `*this`. + */ + virtual BloomFilter* Clone() const = 0; + /** * Serializes the Bloom filter. * @@ -154,14 +170,8 @@ public: // Overridden from BloomFilter. virtual void Clear(); - - /** - * Merges two basic Bloom filters. - * - * @return The merged Bloom filter. - */ - static BasicBloomFilter* Merge(const BasicBloomFilter* x, - const BasicBloomFilter* y); + virtual bool Merge(const BloomFilter* other); + virtual BasicBloomFilter* Clone() const; protected: DECLARE_SERIAL(BasicBloomFilter); @@ -198,14 +208,8 @@ public: // Overridden from BloomFilter. virtual void Clear(); - - /** - * Merges two counting Bloom filters. - * - * @return The merged Bloom filter. - */ - static CountingBloomFilter* Merge(const CountingBloomFilter* x, - const CountingBloomFilter* y); + virtual bool Merge(const BloomFilter* other); + virtual CountingBloomFilter* Clone() const; protected: DECLARE_SERIAL(CountingBloomFilter); From 5769c32f1eeb319e599996e05e0e63b30af34823 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Wed, 24 Jul 2013 13:18:19 +0200 Subject: [PATCH 099/118] Support emptiness check on Bloom filters. --- src/OpaqueVal.cc | 5 +++++ src/OpaqueVal.h | 1 + src/probabilistic/BitVector.cc | 8 ++++++++ src/probabilistic/BitVector.h | 6 ++++++ src/probabilistic/BloomFilter.cc | 10 ++++++++++ src/probabilistic/BloomFilter.h | 9 +++++++++ src/probabilistic/CounterVector.cc | 5 +++++ src/probabilistic/CounterVector.h | 6 ++++++ src/probabilistic/bloom-filter.bif | 3 +++ 9 files changed, 53 insertions(+) diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index feff4f3cc0..a42892e2b2 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -583,6 +583,11 @@ void BloomFilterVal::Clear() bloom_filter->Clear(); } +bool BloomFilterVal::Empty() const + { + return bloom_filter->Empty(); + } + BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x, const BloomFilterVal* y) { diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h index 360bb69803..52c9583fc7 100644 --- a/src/OpaqueVal.h +++ b/src/OpaqueVal.h @@ -126,6 +126,7 @@ public: void Add(const Val* val); size_t Count(const Val* val) const; void Clear(); + bool Empty() const; static BloomFilterVal* Merge(const BloomFilterVal* x, const BloomFilterVal* y); diff --git a/src/probabilistic/BitVector.cc b/src/probabilistic/BitVector.cc index 98f008b24b..13cd1aa3bb 100644 --- a/src/probabilistic/BitVector.cc +++ b/src/probabilistic/BitVector.cc @@ -463,6 +463,14 @@ bool BitVector::Empty() const return bits.empty(); } +bool BitVector::AllZero() const + { + for ( size_t i = 0; i < bits.size(); ++i ) + if ( bits[i] ) + return false; + return true; + } + BitVector::size_type BitVector::FindFirst() const { return find_from(0); diff --git a/src/probabilistic/BitVector.h b/src/probabilistic/BitVector.h index 9eefe1b633..d9c55d53c6 100644 --- a/src/probabilistic/BitVector.h +++ b/src/probabilistic/BitVector.h @@ -253,6 +253,12 @@ public: */ bool Empty() const; + /** + * Checks whether all bits are 0. + * @return `true` iff all bits in all blocks are 0. + */ + bool AllZero() const; + /** * Finds the bit position of of the first 1-bit. * @return The position of the first bit that equals to one or `npos` if no diff --git a/src/probabilistic/BloomFilter.cc b/src/probabilistic/BloomFilter.cc index 132cf376ec..7f769cbf7c 100644 --- a/src/probabilistic/BloomFilter.cc +++ b/src/probabilistic/BloomFilter.cc @@ -74,6 +74,11 @@ size_t BasicBloomFilter::K(size_t cells, size_t capacity) return std::ceil(frac * std::log(2)); } +bool BasicBloomFilter::Empty() const + { + return bits->AllZero(); + } + void BasicBloomFilter::Clear() { bits->Clear(); @@ -167,6 +172,11 @@ CountingBloomFilter::CountingBloomFilter(const Hasher* hasher, cells = new CounterVector(width, arg_cells); } +bool CountingBloomFilter::Empty() const + { + return cells->AllZero(); + } + void CountingBloomFilter::Clear() { cells->Clear(); diff --git a/src/probabilistic/BloomFilter.h b/src/probabilistic/BloomFilter.h index 2ab5b89941..b6cf18672f 100644 --- a/src/probabilistic/BloomFilter.h +++ b/src/probabilistic/BloomFilter.h @@ -47,6 +47,13 @@ public: return CountImpl((*hasher)(x)); } + /** + * Checks whether the Bloom filter is empty. + * + * @return `true` if the Bloom filter contains no elements. + */ + virtual bool Empty() const = 0; + /** * Removes all elements, i.e., resets all bits in the underlying bit vector. */ @@ -169,6 +176,7 @@ public: static size_t K(size_t cells, size_t capacity); // Overridden from BloomFilter. + virtual bool Empty() const; virtual void Clear(); virtual bool Merge(const BloomFilter* other); virtual BasicBloomFilter* Clone() const; @@ -207,6 +215,7 @@ public: CountingBloomFilter(const Hasher* hasher, size_t cells, size_t width); // Overridden from BloomFilter. + virtual bool Empty() const; virtual void Clear(); virtual bool Merge(const BloomFilter* other); virtual CountingBloomFilter* Clone() const; diff --git a/src/probabilistic/CounterVector.cc b/src/probabilistic/CounterVector.cc index 00fa7fb8c0..24c9ff3638 100644 --- a/src/probabilistic/CounterVector.cc +++ b/src/probabilistic/CounterVector.cc @@ -70,6 +70,11 @@ bool CounterVector::Decrement(size_type cell, count_type value) return carry; } +bool CounterVector::AllZero() const + { + return bits->AllZero(); + } + void CounterVector::Clear() { bits->Clear(); diff --git a/src/probabilistic/CounterVector.h b/src/probabilistic/CounterVector.h index 896f98ef1e..df6fc57ac2 100644 --- a/src/probabilistic/CounterVector.h +++ b/src/probabilistic/CounterVector.h @@ -77,6 +77,12 @@ public: */ count_type Count(size_type cell) const; + /** + * Checks whether all counters are 0. + * @return `true` iff all counters have the value 0. + */ + bool AllZero() const; + /** * Sets all counters to 0. */ diff --git a/src/probabilistic/bloom-filter.bif b/src/probabilistic/bloom-filter.bif index 9df168be0e..dd21688fdd 100644 --- a/src/probabilistic/bloom-filter.bif +++ b/src/probabilistic/bloom-filter.bif @@ -109,6 +109,9 @@ function bloomfilter_lookup%(bf: opaque of bloomfilter, x: any%): count %{ const BloomFilterVal* bfv = static_cast(bf); + if ( bfv->Empty() ) + return new Val(0, TYPE_COUNT); + if ( ! bfv->Type() ) reporter->Error("cannot perform lookup on untyped Bloom filter"); From d8226169b8266b554c73b2804d480d10c4a9e456 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Wed, 24 Jul 2013 16:34:52 -0700 Subject: [PATCH 100/118] Fixing random number generation so that it returns same numbers as before. That broke a lot of tests. --- src/H3.h | 16 ++++++++++++++-- src/util.cc | 2 +- src/util.h | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/H3.h b/src/H3.h index 8ea5848816..321fda924b 100644 --- a/src/H3.h +++ b/src/H3.h @@ -66,17 +66,29 @@ template class H3 { public: - H3(T seed = bro_random()) + H3() + { + Init(false, 0); + } + + H3(T seed) + { + Init(true, seed); + } + + void Init(bool have_seed, T seed) { T bit_lookup[N * CHAR_BIT]; for ( size_t bit = 0; bit < N * CHAR_BIT; bit++ ) { bit_lookup[bit] = 0; - seed = bro_prng(seed); for ( size_t i = 0; i < sizeof(T)/2; i++ ) + { + seed = have_seed ? bro_prng(seed) : bro_random(); // assume random() returns at least 16 random bits bit_lookup[bit] = (bit_lookup[bit] << 16) | (seed & 0xFFFF); + } } for ( size_t byte = 0; byte < N; byte++ ) diff --git a/src/util.cc b/src/util.cc index 6bea2eb7f1..23abbacc3f 100644 --- a/src/util.cc +++ b/src/util.cc @@ -829,7 +829,7 @@ bool have_random_seed() return bro_rand_determistic; } -long int bro_prng(long int state) +unsigned int bro_prng(unsigned int state) { // Use our own simple linear congruence PRNG to make sure we are // predictable across platforms. diff --git a/src/util.h b/src/util.h index aaad2d9403..05b3f032d0 100644 --- a/src/util.h +++ b/src/util.h @@ -175,7 +175,7 @@ extern bool have_random_seed(); // A simple linear congruence PRNG. It takes its state as argument and // returns a new random value, which can serve as state for subsequent calls. -long int bro_prng(long int state); +unsigned int bro_prng(unsigned int state); // Replacement for the system random(), to which is normally falls back // except when a seed has been given. In that case, the function bro_prng. From 33e6435329c9c629b47069fd48fd97139f21a2e4 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Wed, 24 Jul 2013 16:39:22 -0700 Subject: [PATCH 101/118] Updating tests. --- doc/scripts/DocSourcesList.cmake | 1 + .../canonified_loaded_scripts.log | 5 +++-- .../canonified_loaded_scripts.log | 5 +++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/doc/scripts/DocSourcesList.cmake b/doc/scripts/DocSourcesList.cmake index 529b03ca83..26a88027ef 100644 --- a/doc/scripts/DocSourcesList.cmake +++ b/doc/scripts/DocSourcesList.cmake @@ -17,6 +17,7 @@ rest_target(${psd} base/init-default.bro internal) rest_target(${psd} base/init-bare.bro internal) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/analyzer.bif.bro) +rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/bloom-filter.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/bro.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/const.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/event.bif.bro) diff --git a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log index b7585a1477..04316da023 100644 --- a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2013-07-05-05-20-50 +#open 2013-07-24-23-38-28 #fields name #types string scripts/base/init-bare.bro @@ -12,6 +12,7 @@ scripts/base/init-bare.bro build/scripts/base/bif/strings.bif.bro build/scripts/base/bif/bro.bif.bro build/scripts/base/bif/reporter.bif.bro + build/scripts/base/bif/bloom-filter.bif.bro build/scripts/base/bif/event.bif.bro build/scripts/base/bif/plugins/__load__.bro build/scripts/base/bif/plugins/Bro_ARP.events.bif.bro @@ -89,4 +90,4 @@ scripts/base/init-bare.bro build/scripts/base/bif/file_analysis.bif.bro scripts/policy/misc/loaded-scripts.bro scripts/base/utils/paths.bro -#close 2013-07-05-05-20-50 +#close 2013-07-24-23-38-28 diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log index 999fd7c841..66212643f3 100644 --- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2013-07-10-21-18-31 +#open 2013-07-24-23-38-33 #fields name #types string scripts/base/init-bare.bro @@ -12,6 +12,7 @@ scripts/base/init-bare.bro build/scripts/base/bif/strings.bif.bro build/scripts/base/bif/bro.bif.bro build/scripts/base/bif/reporter.bif.bro + build/scripts/base/bif/bloom-filter.bif.bro build/scripts/base/bif/event.bif.bro build/scripts/base/bif/plugins/__load__.bro build/scripts/base/bif/plugins/Bro_ARP.events.bif.bro @@ -195,4 +196,4 @@ scripts/base/init-default.bro scripts/base/protocols/tunnels/__load__.bro scripts/base/misc/find-checksum-offloading.bro scripts/policy/misc/loaded-scripts.bro -#close 2013-07-10-21-18-31 +#close 2013-07-24-23-38-33 From e482897f885e2f1039b96782d5e4bc080d74a535 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Thu, 25 Jul 2013 15:16:53 +0200 Subject: [PATCH 102/118] Add docs and use default value for hasher names. --- src/probabilistic/Hasher.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/probabilistic/Hasher.h b/src/probabilistic/Hasher.h index 62c5d58d1f..d266565284 100644 --- a/src/probabilistic/Hasher.h +++ b/src/probabilistic/Hasher.h @@ -63,7 +63,9 @@ public: size_t K() const { return k; } /** - * Returns the hasher's name. TODO: What's this? + * Returns the hasher's name. If not empty, the hasher uses this descriptor + * to seed its *k* hash functions. Otherwise the hasher mixes in the initial + * seed derived from the environment variable `$BRO_SEED`. */ const std::string& Name() const { return name; } @@ -83,7 +85,7 @@ public: protected: Hasher(size_t k, const std::string& name); - private: +private: const size_t k; std::string name; }; @@ -166,7 +168,7 @@ public: * * @param name The name of the hasher. */ - DefaultHasher(size_t k, const std::string& name); + DefaultHasher(size_t k, const std::string& name = ""); // Overridden from Hasher. virtual digest_vector Hash(const void* x, size_t n) const /* final */; @@ -190,7 +192,7 @@ public: * * @param name The name of the hasher. */ - DoubleHasher(size_t k, const std::string& name); + DoubleHasher(size_t k, const std::string& name = ""); // Overridden from Hasher. virtual digest_vector Hash(const void* x, size_t n) const /* final */; From 2fc5ca53ff8f90aa959b2bc65626b319a1dee529 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Thu, 25 Jul 2013 17:35:35 +0200 Subject: [PATCH 103/118] Make hashers serializable. There exists still a small bug that I could not find; the unit test istate/opaque.bro fails. If someone sees why, please chime in. --- src/SerialTypes.h | 6 ++ src/probabilistic/BloomFilter.cc | 19 +----- src/probabilistic/BloomFilter.h | 3 - src/probabilistic/Hasher.cc | 99 ++++++++++++++++++++++++++---- src/probabilistic/Hasher.h | 33 +++++----- src/probabilistic/bloom-filter.bif | 4 +- 6 files changed, 117 insertions(+), 47 deletions(-) diff --git a/src/SerialTypes.h b/src/SerialTypes.h index 85aed10bda..9933d005f0 100644 --- a/src/SerialTypes.h +++ b/src/SerialTypes.h @@ -52,6 +52,7 @@ SERIAL_IS(RE_MATCHER, 0x1400) SERIAL_IS(BITVECTOR, 0x1500) SERIAL_IS(COUNTERVECTOR, 0x1600) SERIAL_IS(BLOOMFILTER, 0x1700) +SERIAL_IS(HASHER, 0x1800) // These are the externally visible types. const SerialType SER_NONE = 0; @@ -206,6 +207,11 @@ SERIAL_BLOOMFILTER(BLOOMFILTER, 1) SERIAL_BLOOMFILTER(BASICBLOOMFILTER, 2) SERIAL_BLOOMFILTER(COUNTINGBLOOMFILTER, 3) +#define SERIAL_HASHER(name, val) SERIAL_CONST(name, val, HASHER) +SERIAL_HASHER(HASHER, 1) +SERIAL_HASHER(DEFAULTHASHER, 2) +SERIAL_HASHER(DOUBLEHASHER, 3) + SERIAL_CONST2(ID) SERIAL_CONST2(STATE_ACCESS) SERIAL_CONST2(CASE) diff --git a/src/probabilistic/BloomFilter.cc b/src/probabilistic/BloomFilter.cc index 7f769cbf7c..d446643ed3 100644 --- a/src/probabilistic/BloomFilter.cc +++ b/src/probabilistic/BloomFilter.cc @@ -38,28 +38,15 @@ bool BloomFilter::DoSerialize(SerialInfo* info) const { DO_SERIALIZE(SER_BLOOMFILTER, SerialObj); - if ( ! SERIALIZE(static_cast(hasher->K())) ) - return false; - - return SERIALIZE_STR(hasher->Name().c_str(), hasher->Name().size()); + return hasher->Serialize(info); } bool BloomFilter::DoUnserialize(UnserialInfo* info) { DO_UNSERIALIZE(SerialObj); - uint16 k; - if ( ! UNSERIALIZE(&k) ) - return false; - - const char* name; - if ( ! UNSERIALIZE_STR(&name, 0) ) - return false; - - hasher = Hasher::Create(k, name); - - delete [] name; - return true; + hasher = Hasher::Unserialize(info); + return hasher != 0; } size_t BasicBloomFilter::M(double fp, size_t capacity) diff --git a/src/probabilistic/BloomFilter.h b/src/probabilistic/BloomFilter.h index b6cf18672f..4865ae145c 100644 --- a/src/probabilistic/BloomFilter.h +++ b/src/probabilistic/BloomFilter.h @@ -13,9 +13,6 @@ class CounterVector; /** * The abstract base class for Bloom filters. - * - * At this point we won't let the user choose the hasher, but we might open - * up the interface in the future. */ class BloomFilter : public SerialObj { public: diff --git a/src/probabilistic/Hasher.cc b/src/probabilistic/Hasher.cc index f9ce7bdd6b..7db363142d 100644 --- a/src/probabilistic/Hasher.cc +++ b/src/probabilistic/Hasher.cc @@ -4,9 +4,56 @@ #include "Hasher.h" #include "digest.h" +#include "Serializer.h" using namespace probabilistic; +bool Hasher::Serialize(SerialInfo* info) const + { + return SerialObj::Serialize(info); + } + +Hasher* Hasher::Unserialize(UnserialInfo* info) + { + return reinterpret_cast(SerialObj::Unserialize(info, SER_HASHER)); + } + +bool Hasher::DoSerialize(SerialInfo* info) const + { + DO_SERIALIZE(SER_HASHER, SerialObj); + + if ( ! SERIALIZE(static_cast(k)) ) + return false; + + return SERIALIZE_STR(name.c_str(), name.size()); + } + +bool Hasher::DoUnserialize(UnserialInfo* info) + { + DO_UNSERIALIZE(SerialObj); + + uint16 serial_k; + if ( ! UNSERIALIZE(&serial_k) ) + return false; + k = serial_k; + assert(k > 0); + + const char* serial_name; + if ( ! UNSERIALIZE_STR(&serial_name, 0) ) + return false; + name = serial_name; + delete [] serial_name; + + return true; + } + +Hasher::Hasher(size_t k, const std::string& arg_name) + : k(k) + { + name = arg_name; + } + + UHF::UHF(size_t seed, const std::string& extra) : h(compute_seed(seed, extra)) { @@ -40,17 +87,6 @@ size_t UHF::compute_seed(size_t seed, const std::string& extra) return *reinterpret_cast(buf); } -Hasher* Hasher::Create(size_t k, const std::string& name) - { - return new DefaultHasher(k, name); - } - -Hasher::Hasher(size_t k, const std::string& arg_name) - : k(k) - { - name = arg_name; - } - DefaultHasher::DefaultHasher(size_t k, const std::string& name) : Hasher(k, name) { @@ -82,6 +118,27 @@ bool DefaultHasher::Equals(const Hasher* other) const return hash_functions == o->hash_functions; } +IMPLEMENT_SERIAL(DefaultHasher, SER_DEFAULTHASHER) + +bool DefaultHasher::DoSerialize(SerialInfo* info) const + { + DO_SERIALIZE(SER_DEFAULTHASHER, Hasher); + + // Nothing to do here, the base class has all we need serialized already. + return true; + } + +bool DefaultHasher::DoUnserialize(UnserialInfo* info) + { + DO_UNSERIALIZE(Hasher); + + hash_functions.clear(); + for ( size_t i = 0; i < K(); ++i ) + hash_functions.push_back(UHF(i, Name())); + + return true; + } + DoubleHasher::DoubleHasher(size_t k, const std::string& name) : Hasher(k, name), h1(1, name), h2(2, name) { @@ -112,3 +169,23 @@ bool DoubleHasher::Equals(const Hasher* other) const const DoubleHasher* o = static_cast(other); return h1 == o->h1 && h2 == o->h2; } + +IMPLEMENT_SERIAL(DoubleHasher, SER_DOUBLEHASHER) + +bool DoubleHasher::DoSerialize(SerialInfo* info) const + { + DO_SERIALIZE(SER_DOUBLEHASHER, Hasher); + + // Nothing to do here, the base class has all we need serialized already. + return true; + } + +bool DoubleHasher::DoUnserialize(UnserialInfo* info) + { + DO_UNSERIALIZE(Hasher); + + h1 = UHF(1, Name()); + h2 = UHF(2, Name()); + + return true; + } diff --git a/src/probabilistic/Hasher.h b/src/probabilistic/Hasher.h index d266565284..7e6a8ba134 100644 --- a/src/probabilistic/Hasher.h +++ b/src/probabilistic/Hasher.h @@ -5,6 +5,7 @@ #include "Hash.h" #include "H3.h" +#include "SerialObj.h" namespace probabilistic { @@ -12,7 +13,7 @@ namespace probabilistic { * Abstract base class for hashers. A hasher creates a family of hash * functions to hash an element *k* times. */ -class Hasher { +class Hasher : public SerialObj { public: typedef hash_t digest; typedef std::vector digest_vector; @@ -69,24 +70,18 @@ public: */ const std::string& Name() const { return name; } - /** - * Constructs the hasher used by the implementation. This hardcodes a - * specific hashing policy. It exists only because the HashingPolicy - * class hierachy is not yet serializable. - * - * @param k The number of hash functions to apply. - * - * @param name The hasher's name. - * - * @return Returns a new hasher instance. - */ - static Hasher* Create(size_t k, const std::string& name); + bool Serialize(SerialInfo* info) const; + static Hasher* Unserialize(UnserialInfo* info); protected: + DECLARE_ABSTRACT_SERIAL(Hasher); + + Hasher() { } + Hasher(size_t k, const std::string& name); private: - const size_t k; + size_t k; std::string name; }; @@ -106,7 +101,7 @@ public: * seed to compute the seed for t to compute the seed NUL-terminated * string as additional seed. */ - UHF(size_t seed, const std::string& extra = ""); + UHF(size_t seed = 0, const std::string& extra = ""); template Hasher::digest operator()(const T& x) const @@ -175,7 +170,11 @@ public: virtual DefaultHasher* Clone() const /* final */; virtual bool Equals(const Hasher* other) const /* final */; + DECLARE_SERIAL(DefaultHasher); + private: + DefaultHasher() { } + std::vector hash_functions; }; @@ -199,7 +198,11 @@ public: virtual DoubleHasher* Clone() const /* final */; virtual bool Equals(const Hasher* other) const /* final */; + DECLARE_SERIAL(DoubleHasher); + private: + DoubleHasher() { } + UHF h1; UHF h2; }; diff --git a/src/probabilistic/bloom-filter.bif b/src/probabilistic/bloom-filter.bif index dd21688fdd..f03e3d149b 100644 --- a/src/probabilistic/bloom-filter.bif +++ b/src/probabilistic/bloom-filter.bif @@ -40,7 +40,7 @@ function bloomfilter_basic_init%(fp: double, capacity: count, size_t cells = BasicBloomFilter::M(fp, capacity); size_t optimal_k = BasicBloomFilter::K(cells, capacity); - const Hasher* h = Hasher::Create(optimal_k, name->CheckString()); + const Hasher* h = new DefaultHasher(optimal_k, name->CheckString()); return new BloomFilterVal(new BasicBloomFilter(h, cells)); %} @@ -68,7 +68,7 @@ function bloomfilter_counting_init%(k: count, cells: count, max: count, return 0; } - const Hasher* h = Hasher::Create(k, name->CheckString()); + const Hasher* h = new DefaultHasher(k, name->CheckString()); uint16 width = 1; while ( max >>= 1 ) From febb7e83957aa14fbc14d59782b33ac3690388b3 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Thu, 25 Jul 2013 09:55:15 -0700 Subject: [PATCH 104/118] Covenience make target to update the three coverage tests that usually need tweaking when scripts get added/removed. --- testing/btest/Makefile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/testing/btest/Makefile b/testing/btest/Makefile index ff63bdb601..47451fbf27 100644 --- a/testing/btest/Makefile +++ b/testing/btest/Makefile @@ -24,4 +24,11 @@ cleanup: update-doc-sources: ../../doc/scripts/genDocSourcesList.sh ../../doc/scripts/DocSourcesList.cmake +# Updates the three coverage tests that usually need tweaking when +# scripts get added/removed. +update-coverage-tests: update-doc-sources + btest -qU coverage.bare-load-baseline + btest -qU coverage.default-load-baseline + @echo "Use 'git diff' to check updates look right." + .PHONY: all btest-verbose brief btest-brief coverage cleanup From 4a7046848caf6f0b97149c91902e42b770c97b3c Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Thu, 25 Jul 2013 09:45:10 -0700 Subject: [PATCH 105/118] bif files declared with bif_target() are now automatically compiled in. No more manual includes to pull them in. (It doesn't quite work fully automatically yet for some bifs that need script-level types defined, like the input and logging frameworks. They still do a manual "@load foo.bif" in their main.bro to get the order right. It's a bit tricky to fix that and would probably need splitting main.bro into two parts; not sure that's worth it.) --- CHANGES | 10 ++++++++++ VERSION | 2 +- aux/binpac | 2 +- cmake | 2 +- scripts/base/init-bare.bro | 2 ++ src/CMakeLists.txt | 18 +++++++++++++++++- src/Func.cc | 4 ++++ src/analyzer/Manager.cc | 1 - src/file_analysis/Manager.cc | 1 - .../canonified_loaded_scripts.log | 5 +++-- .../canonified_loaded_scripts.log | 5 +++-- 11 files changed, 42 insertions(+), 10 deletions(-) diff --git a/CHANGES b/CHANGES index 7cbbc74e4f..92d16d7776 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,14 @@ +2.1-826 | 2013-07-25 10:12:26 -0700 + + * bif files declared with bif_target() are now automatically + compiled in. No more manual includes to pull them in. (Robin + Sommer) + + * Covenience make target in testing/btest to update the three + coverage tests that usually need tweaking when scripts get + added/removed. (Robin Sommer) + 2.1-824 | 2013-07-22 14:25:14 -0400 * Fixed a scriptland state issue that manifested especially badly on proxies. (Seth Hall) diff --git a/VERSION b/VERSION index d35eaf1454..71d91b2ea8 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.1-824 +2.1-826 diff --git a/aux/binpac b/aux/binpac index c39bd478b9..0c91feea55 160000 --- a/aux/binpac +++ b/aux/binpac @@ -1 +1 @@ -Subproject commit c39bd478b9d0ecd05b1b83aa9d09a7887893977c +Subproject commit 0c91feea55d00d3a1787203b3a43e3f9044d66e0 diff --git a/cmake b/cmake index 0187b33a29..026639f836 160000 --- a/cmake +++ b/cmake @@ -1 +1 @@ -Subproject commit 0187b33a29d5ec824f940feff60dc5d8c2fe314f +Subproject commit 026639f8368e56742c0cb5d9fb390ea64e60ec50 diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro index 60ed0d2fd1..cffa6d80f1 100644 --- a/scripts/base/init-bare.bro +++ b/scripts/base/init-bare.bro @@ -3050,3 +3050,5 @@ const snaplen = 8192 &redef; @load base/frameworks/input @load base/frameworks/analyzer @load base/frameworks/file-analysis + +@load base/bif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e353dd4695..4644bab80a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -6,6 +6,9 @@ include_directories(BEFORE # This collects generated bif and pac files from subdirectories. set(bro_ALL_GENERATED_OUTPUTS CACHE INTERNAL "automatically generated files" FORCE) +# This collects bif inputs that we'll load automatically. +set(bro_AUTO_BIFS CACHE INTERNAL "BIFs for automatic inclusion" FORCE) + # If TRUE, use CMake's object libraries for sub-directories instead of # static libraries. This requires CMake >= 2.8.8. set(bro_HAVE_OBJECT_LIBRARIES FALSE) @@ -382,8 +385,21 @@ set(BRO_EXE bro CACHE STRING "Bro executable binary" FORCE) # Target to create all the autogenerated files. +add_custom_target(generate_outputs_stage1) +add_dependencies(generate_outputs_stage1 ${bro_ALL_GENERATED_OUTPUTS}) + +# Target to create the joint includes files that pull in the bif code. +bro_bif_create_includes(generate_outputs_stage2 ${CMAKE_CURRENT_BINARY_DIR} "${bro_AUTO_BIFS}") +add_dependencies(generate_outputs_stage2 generate_outputs_stage1) + +# Global target to trigger creation of autogenerated code. add_custom_target(generate_outputs) -add_dependencies(generate_outputs ${bro_ALL_GENERATED_OUTPUTS}) +add_dependencies(generate_outputs generate_outputs_stage2) + +# Build __load__.bro files for standard *.bif.bro. +bro_bif_create_loader(bif_loader ${CMAKE_BINARY_DIR}/scripts/base/bif) +add_dependencies(bif_loader ${bro_SUBDIRS}) +add_dependencies(bro bif_loader) # Build __load__.bro files for plugins/*.bif.bro. bro_bif_create_loader(bif_loader_plugins ${CMAKE_BINARY_DIR}/scripts/base/bif/plugins) diff --git a/src/Func.cc b/src/Func.cc index f3718fe231..7859e8d2ad 100644 --- a/src/Func.cc +++ b/src/Func.cc @@ -560,6 +560,8 @@ void builtin_error(const char* msg, BroObj* arg) #include "reporter.bif.func_def" #include "strings.bif.func_def" +#include "__all__.bif.cc" // Autogenerated for compiling in the bif_target() code. + void init_builtin_funcs() { bro_resources = internal_type("bro_resources")->AsRecordType(); @@ -574,6 +576,8 @@ void init_builtin_funcs() #include "reporter.bif.func_init" #include "strings.bif.func_init" +#include "__all__.bif.init.cc" // Autogenerated for compiling in the bif_target() code. + did_builtin_init = true; } diff --git a/src/analyzer/Manager.cc b/src/analyzer/Manager.cc index 5695dec625..8b290e2341 100644 --- a/src/analyzer/Manager.cc +++ b/src/analyzer/Manager.cc @@ -103,7 +103,6 @@ void Manager::InitPreScript() void Manager::InitPostScript() { - #include "analyzer.bif.init.cc" } void Manager::DumpDebug() diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index ea1ed954ed..a7f7a29c18 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -60,7 +60,6 @@ void Manager::RegisterAnalyzerComponent(Component* component) void Manager::InitPostScript() { - #include "file_analysis.bif.init.cc" } void Manager::Terminate() diff --git a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log index b7585a1477..724de75027 100644 --- a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2013-07-05-05-20-50 +#open 2013-07-25-17-10-49 #fields name #types string scripts/base/init-bare.bro @@ -87,6 +87,7 @@ scripts/base/init-bare.bro scripts/base/frameworks/file-analysis/__load__.bro scripts/base/frameworks/file-analysis/main.bro build/scripts/base/bif/file_analysis.bif.bro + build/scripts/base/bif/__load__.bro scripts/policy/misc/loaded-scripts.bro scripts/base/utils/paths.bro -#close 2013-07-05-05-20-50 +#close 2013-07-25-17-10-49 diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log index 999fd7c841..a3e89b4d60 100644 --- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2013-07-10-21-18-31 +#open 2013-07-25-17-10-50 #fields name #types string scripts/base/init-bare.bro @@ -87,6 +87,7 @@ scripts/base/init-bare.bro scripts/base/frameworks/file-analysis/__load__.bro scripts/base/frameworks/file-analysis/main.bro build/scripts/base/bif/file_analysis.bif.bro + build/scripts/base/bif/__load__.bro scripts/base/init-default.bro scripts/base/utils/site.bro scripts/base/utils/patterns.bro @@ -195,4 +196,4 @@ scripts/base/init-default.bro scripts/base/protocols/tunnels/__load__.bro scripts/base/misc/find-checksum-offloading.bro scripts/policy/misc/loaded-scripts.bro -#close 2013-07-10-21-18-31 +#close 2013-07-25-17-10-50 From c11bf3d9226fed28dbf2676c123cadd52bd13a68 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Thu, 25 Jul 2013 11:28:30 -0700 Subject: [PATCH 106/118] Fixing serialization bug introduced during earlier merge. --- src/OpaqueVal.cc | 6 +++--- src/probabilistic/BitVector.cc | 6 +++--- src/probabilistic/CounterVector.cc | 6 +++--- .../canonified_loaded_scripts.log | 14 +++++++------- .../canonified_loaded_scripts.log | 14 +++++++------- 5 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index b70cfee086..66b3c081e7 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -656,11 +656,11 @@ bool BloomFilterVal::DoUnserialize(UnserialInfo* info) if ( is_typed ) { - BroType* type = BroType::Unserialize(info); - if ( ! Typify(type) ) + BroType* t = BroType::Unserialize(info); + if ( ! Typify(t) ) return false; - Unref(type); + Unref(t); } bloom_filter = probabilistic::BloomFilter::Unserialize(info); diff --git a/src/probabilistic/BitVector.cc b/src/probabilistic/BitVector.cc index c0285eced3..6e642e62c1 100644 --- a/src/probabilistic/BitVector.cc +++ b/src/probabilistic/BitVector.cc @@ -568,11 +568,11 @@ bool BitVector::DoUnserialize(UnserialInfo* info) bits[i] = static_cast(block); } - uint64 num_bits; - if ( ! UNSERIALIZE(&num_bits) ) + uint64 n; + if ( ! UNSERIALIZE(&n) ) return false; - num_bits = static_cast(num_bits); + num_bits = static_cast(n); return true; } diff --git a/src/probabilistic/CounterVector.cc b/src/probabilistic/CounterVector.cc index 24c9ff3638..d5635fc0f2 100644 --- a/src/probabilistic/CounterVector.cc +++ b/src/probabilistic/CounterVector.cc @@ -183,11 +183,11 @@ bool CounterVector::DoUnserialize(UnserialInfo* info) if ( ! bits ) return false; - uint64 width; - if ( ! UNSERIALIZE(&width) ) + uint64 w; + if ( ! UNSERIALIZE(&w) ) return false; - width = static_cast(width); + width = static_cast(w); return true; } diff --git a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log index 3236b39acd..5879c504e2 100644 --- a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2013-07-25-17-17-10 +#open 2013-07-25-17-54-33 #fields name #types string scripts/base/init-bare.bro @@ -23,28 +23,28 @@ scripts/base/init-bare.bro build/scripts/base/bif/plugins/Bro_DCE_RPC.events.bif.bro build/scripts/base/bif/plugins/Bro_DHCP.events.bif.bro build/scripts/base/bif/plugins/Bro_DNS.events.bif.bro + build/scripts/base/bif/plugins/Bro_FTP.events.bif.bro + build/scripts/base/bif/plugins/Bro_FTP.functions.bif.bro build/scripts/base/bif/plugins/Bro_File.events.bif.bro build/scripts/base/bif/plugins/Bro_FileHash.events.bif.bro build/scripts/base/bif/plugins/Bro_Finger.events.bif.bro - build/scripts/base/bif/plugins/Bro_FTP.events.bif.bro - build/scripts/base/bif/plugins/Bro_FTP.functions.bif.bro - build/scripts/base/bif/plugins/Bro_Gnutella.events.bif.bro build/scripts/base/bif/plugins/Bro_GTPv1.events.bif.bro + build/scripts/base/bif/plugins/Bro_Gnutella.events.bif.bro build/scripts/base/bif/plugins/Bro_HTTP.events.bif.bro build/scripts/base/bif/plugins/Bro_HTTP.functions.bif.bro build/scripts/base/bif/plugins/Bro_ICMP.events.bif.bro + build/scripts/base/bif/plugins/Bro_IRC.events.bif.bro build/scripts/base/bif/plugins/Bro_Ident.events.bif.bro build/scripts/base/bif/plugins/Bro_InterConn.events.bif.bro - build/scripts/base/bif/plugins/Bro_IRC.events.bif.bro build/scripts/base/bif/plugins/Bro_Login.events.bif.bro build/scripts/base/bif/plugins/Bro_Login.functions.bif.bro build/scripts/base/bif/plugins/Bro_MIME.events.bif.bro build/scripts/base/bif/plugins/Bro_Modbus.events.bif.bro build/scripts/base/bif/plugins/Bro_NCP.events.bif.bro + build/scripts/base/bif/plugins/Bro_NTP.events.bif.bro build/scripts/base/bif/plugins/Bro_NetBIOS.events.bif.bro build/scripts/base/bif/plugins/Bro_NetBIOS.functions.bif.bro build/scripts/base/bif/plugins/Bro_NetFlow.events.bif.bro - build/scripts/base/bif/plugins/Bro_NTP.events.bif.bro build/scripts/base/bif/plugins/Bro_PIA.events.bif.bro build/scripts/base/bif/plugins/Bro_POP3.events.bif.bro build/scripts/base/bif/plugins/Bro_RPC.events.bif.bro @@ -91,4 +91,4 @@ scripts/base/init-bare.bro build/scripts/base/bif/__load__.bro scripts/policy/misc/loaded-scripts.bro scripts/base/utils/paths.bro -#close 2013-07-25-17-17-10 +#close 2013-07-25-17-54-33 diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log index cb4ccba850..2a820f4270 100644 --- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2013-07-25-17-17-11 +#open 2013-07-25-17-54-33 #fields name #types string scripts/base/init-bare.bro @@ -23,28 +23,28 @@ scripts/base/init-bare.bro build/scripts/base/bif/plugins/Bro_DCE_RPC.events.bif.bro build/scripts/base/bif/plugins/Bro_DHCP.events.bif.bro build/scripts/base/bif/plugins/Bro_DNS.events.bif.bro + build/scripts/base/bif/plugins/Bro_FTP.events.bif.bro + build/scripts/base/bif/plugins/Bro_FTP.functions.bif.bro build/scripts/base/bif/plugins/Bro_File.events.bif.bro build/scripts/base/bif/plugins/Bro_FileHash.events.bif.bro build/scripts/base/bif/plugins/Bro_Finger.events.bif.bro - build/scripts/base/bif/plugins/Bro_FTP.events.bif.bro - build/scripts/base/bif/plugins/Bro_FTP.functions.bif.bro - build/scripts/base/bif/plugins/Bro_Gnutella.events.bif.bro build/scripts/base/bif/plugins/Bro_GTPv1.events.bif.bro + build/scripts/base/bif/plugins/Bro_Gnutella.events.bif.bro build/scripts/base/bif/plugins/Bro_HTTP.events.bif.bro build/scripts/base/bif/plugins/Bro_HTTP.functions.bif.bro build/scripts/base/bif/plugins/Bro_ICMP.events.bif.bro + build/scripts/base/bif/plugins/Bro_IRC.events.bif.bro build/scripts/base/bif/plugins/Bro_Ident.events.bif.bro build/scripts/base/bif/plugins/Bro_InterConn.events.bif.bro - build/scripts/base/bif/plugins/Bro_IRC.events.bif.bro build/scripts/base/bif/plugins/Bro_Login.events.bif.bro build/scripts/base/bif/plugins/Bro_Login.functions.bif.bro build/scripts/base/bif/plugins/Bro_MIME.events.bif.bro build/scripts/base/bif/plugins/Bro_Modbus.events.bif.bro build/scripts/base/bif/plugins/Bro_NCP.events.bif.bro + build/scripts/base/bif/plugins/Bro_NTP.events.bif.bro build/scripts/base/bif/plugins/Bro_NetBIOS.events.bif.bro build/scripts/base/bif/plugins/Bro_NetBIOS.functions.bif.bro build/scripts/base/bif/plugins/Bro_NetFlow.events.bif.bro - build/scripts/base/bif/plugins/Bro_NTP.events.bif.bro build/scripts/base/bif/plugins/Bro_PIA.events.bif.bro build/scripts/base/bif/plugins/Bro_POP3.events.bif.bro build/scripts/base/bif/plugins/Bro_RPC.events.bif.bro @@ -197,4 +197,4 @@ scripts/base/init-default.bro scripts/base/protocols/tunnels/__load__.bro scripts/base/misc/find-checksum-offloading.bro scripts/policy/misc/loaded-scripts.bro -#close 2013-07-25-17-17-11 +#close 2013-07-25-17-54-33 From 7dd5771384d6e45693e602efaebc18ffbabe8c47 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Thu, 25 Jul 2013 12:02:41 -0700 Subject: [PATCH 107/118] Protection about broken traces with empty pcap headers. --- CHANGES | 5 +++++ VERSION | 2 +- src/PktSrc.cc | 6 ++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 3529576088..912d7d301f 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,9 @@ +2.1-888 | 2013-07-25 12:02:41 -0700 + + * Protection about broken traces with empty pcap headers. (Matt + Thompson) + 2.1-887 | 2013-07-25 11:33:27 -0700 * Support for Bloom filter. (Matthias Vallentin) diff --git a/VERSION b/VERSION index 2ced22d6f4..4f0ea7a5ac 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.1-887 +2.1-888 diff --git a/src/PktSrc.cc b/src/PktSrc.cc index 105dc90d30..48b382565b 100644 --- a/src/PktSrc.cc +++ b/src/PktSrc.cc @@ -77,6 +77,12 @@ int PktSrc::ExtractNextPacket() data = last_data = pcap_next(pd, &hdr); + if ( data && (hdr.len == 0 || hdr.caplen == 0) ) + { + sessions->Weird("empty_pcap_header", &hdr, data); + return 0; + } + if ( data ) next_timestamp = hdr.ts.tv_sec + double(hdr.ts.tv_usec) / 1e6; From 8d729a378bd149206326f470fa76c1d4447e038f Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Thu, 25 Jul 2013 12:08:01 -0700 Subject: [PATCH 108/118] Updating submodule(s). [nomail] --- aux/binpac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aux/binpac b/aux/binpac index 0c91feea55..896ddedde5 160000 --- a/aux/binpac +++ b/aux/binpac @@ -1 +1 @@ -Subproject commit 0c91feea55d00d3a1787203b3a43e3f9044d66e0 +Subproject commit 896ddedde55c48ec2163577fc258b49c418abb3e From 939619889d41b3233e72e0c109301355bee25173 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Thu, 25 Jul 2013 16:51:16 -0500 Subject: [PATCH 109/118] File analysis fixes and test updates. - Several places were just using old variable names or not loading scripts correctly after they'd been renamed/moved. - Revert/adjust a change in how HTTP file handles are generated that broke partial content responses. - Turn some libmagic builtin checks back on; seems some are actually useful (e.g. text detection seems to be a builtin). The rule going forward probably will be only to turn off a builtin if we confirm it causes issues. - Removed some tests that are redundant or not necessary anymore because the generic file analysis tests cover them. - A couple FTP tests still fail that I think need an actual solution via script changes. --- doc/scripts/CMakeLists.txt | 4 +- doc/scripts/DocSourcesList.cmake | 23 +- scripts/base/frameworks/files/main.bro | 8 +- scripts/base/protocols/ftp/files.bro | 1 + scripts/base/protocols/http/files.bro | 10 +- scripts/policy/protocols/http/detect-MHR.bro | 44 --- .../protocols/smtp/entities-excerpt.bro | 7 +- scripts/test-all-policy.bro | 4 +- src/util.cc | 14 + src/util.h | 17 - .../Baseline/core.tunnels.ayiya/http.log | 6 +- .../http.log | 12 +- .../core.tunnels.gtp.outer_ip_frag/http.log | 10 +- .../Baseline/core.tunnels.teredo/http.log | 16 +- .../http.log | 12 +- .../canonified_loaded_scripts.log | 10 +- .../istate.events-ssl/receiver.http.log | 10 +- .../istate.events-ssl/sender.http.log | 10 +- .../Baseline/istate.events/receiver.http.log | 10 +- .../Baseline/istate.events/sender.http.log | 10 +- .../out | 1 + .../get.out | 1 + .../bro..stdout | 2 + .../get.out | 1 + .../out | 2 +- .../get-gzip.out | 1 + .../get.out | 1 + .../out | 4 + .../a.out | 1 + .../b.out | 2 + .../c.out | 1 + .../out | 5 + .../out | 2 + .../file_analysis.log | 10 - .../files.log | 10 + .../out | 3 + .../http.log | 10 +- .../manager-1.notice.log | 10 +- .../manager-1.notice.log | 10 +- .../notice.log | 10 +- .../conn.log | 14 - .../extractions | 22 -- .../ftp.log | 21 -- .../http.log | 10 +- .../http-item.dat | 304 ------------------ .../http.log | 10 - .../http.log | 100 +++--- .../http.log | 14 - .../http.log | 18 +- .../http.log | 10 +- .../scripts.base.protocols.irc.basic/irc.log | 6 +- .../irc-dcc-item.dat | Bin 42208 -> 0 bytes .../irc.log | 13 - .../smtp.log | 10 +- .../extractions | 277 ---------------- .../filecount | 1 - .../smtp_entities.log | 12 - .../notice.log | 12 +- testing/btest/istate/events-ssl.bro | 21 +- testing/btest/istate/events.bro | 15 +- .../file-analysis/bifs/remove_action.bro | 4 +- .../base/frameworks/file-analysis/irc.bro | 16 +- .../base/frameworks/file-analysis/logging.bro | 2 +- .../base/protocols/ftp/ftp-extract.bro | 10 - .../protocols/http/http-extract-files.bro | 6 - .../base/protocols/http/http-mime-and-md5.bro | 6 - .../base/protocols/http/multipart-extract.bro | 8 +- .../base/protocols/irc/dcc-extract.test | 11 - .../base/protocols/smtp/mime-extract.test | 11 - testing/external/subdir-btest.cfg | 2 +- testing/scripts/file-analysis-test.bro | 4 + 71 files changed, 293 insertions(+), 1002 deletions(-) delete mode 100644 scripts/policy/protocols/http/detect-MHR.bro delete mode 100644 testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log create mode 100644 testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/files.log delete mode 100644 testing/btest/Baseline/scripts.base.protocols.ftp.ftp-extract/conn.log delete mode 100644 testing/btest/Baseline/scripts.base.protocols.ftp.ftp-extract/extractions delete mode 100644 testing/btest/Baseline/scripts.base.protocols.ftp.ftp-extract/ftp.log delete mode 100644 testing/btest/Baseline/scripts.base.protocols.http.http-extract-files/http-item.dat delete mode 100644 testing/btest/Baseline/scripts.base.protocols.http.http-extract-files/http.log delete mode 100644 testing/btest/Baseline/scripts.base.protocols.http.http-mime-and-md5/http.log delete mode 100644 testing/btest/Baseline/scripts.base.protocols.irc.dcc-extract/irc-dcc-item.dat delete mode 100644 testing/btest/Baseline/scripts.base.protocols.irc.dcc-extract/irc.log delete mode 100644 testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/extractions delete mode 100644 testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/filecount delete mode 100644 testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/smtp_entities.log delete mode 100644 testing/btest/scripts/base/protocols/ftp/ftp-extract.bro delete mode 100644 testing/btest/scripts/base/protocols/http/http-extract-files.bro delete mode 100644 testing/btest/scripts/base/protocols/http/http-mime-and-md5.bro delete mode 100644 testing/btest/scripts/base/protocols/irc/dcc-extract.test delete mode 100644 testing/btest/scripts/base/protocols/smtp/mime-extract.test diff --git a/doc/scripts/CMakeLists.txt b/doc/scripts/CMakeLists.txt index ddb09bb29c..e7e39d0b3f 100644 --- a/doc/scripts/CMakeLists.txt +++ b/doc/scripts/CMakeLists.txt @@ -99,7 +99,7 @@ macro(REST_TARGET srcDir broInput) COMMAND "${CMAKE_COMMAND}" ARGS -E remove_directory .state # generate the reST documentation using bro - COMMAND BROPATH=${BROPATH}:${srcDir} BROMAGIC=${CMAKE_SOURCE_DIR}/magic ${CMAKE_BINARY_DIR}/src/bro + COMMAND BROPATH=${BROPATH}:${srcDir} BROMAGIC=${CMAKE_SOURCE_DIR}/magic/database ${CMAKE_BINARY_DIR}/src/bro ARGS -b -Z ${broInput} || (rm -rf .state *.log *.rst && exit 1) # move generated doc into a new directory tree that # defines the final structure of documents @@ -130,7 +130,7 @@ add_custom_command(OUTPUT proto-analyzers.rst COMMAND "${CMAKE_COMMAND}" ARGS -E remove_directory .state # generate the reST documentation using bro - COMMAND BROPATH=${BROPATH}:${srcDir} BROMAGIC=${CMAKE_SOURCE_DIR}/magic ${CMAKE_BINARY_DIR}/src/bro + COMMAND BROPATH=${BROPATH}:${srcDir} BROMAGIC=${CMAKE_SOURCE_DIR}/magic/database ${CMAKE_BINARY_DIR}/src/bro ARGS -b -Z base/init-bare.bro || (rm -rf .state *.log *.rst && exit 1) # move generated doc into a new directory tree that # defines the final structure of documents diff --git a/doc/scripts/DocSourcesList.cmake b/doc/scripts/DocSourcesList.cmake index 529b03ca83..b2c932d117 100644 --- a/doc/scripts/DocSourcesList.cmake +++ b/doc/scripts/DocSourcesList.cmake @@ -73,6 +73,8 @@ rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_ZIP.events.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/reporter.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/strings.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/types.bif.bro) +rest_target(${psd} base/files/extract/main.bro) +rest_target(${psd} base/files/hash/main.bro) rest_target(${psd} base/frameworks/analyzer/main.bro) rest_target(${psd} base/frameworks/cluster/main.bro) rest_target(${psd} base/frameworks/cluster/nodes/manager.bro) @@ -82,7 +84,7 @@ rest_target(${psd} base/frameworks/cluster/setup-connections.bro) rest_target(${psd} base/frameworks/communication/main.bro) rest_target(${psd} base/frameworks/control/main.bro) rest_target(${psd} base/frameworks/dpd/main.bro) -rest_target(${psd} base/frameworks/file-analysis/main.bro) +rest_target(${psd} base/frameworks/files/main.bro) rest_target(${psd} base/frameworks/input/main.bro) rest_target(${psd} base/frameworks/input/readers/ascii.bro) rest_target(${psd} base/frameworks/input/readers/benchmark.bro) @@ -136,25 +138,22 @@ rest_target(${psd} base/protocols/conn/main.bro) rest_target(${psd} base/protocols/conn/polling.bro) rest_target(${psd} base/protocols/dns/consts.bro) rest_target(${psd} base/protocols/dns/main.bro) -rest_target(${psd} base/protocols/ftp/file-analysis.bro) -rest_target(${psd} base/protocols/ftp/file-extract.bro) +rest_target(${psd} base/protocols/ftp/files.bro) rest_target(${psd} base/protocols/ftp/gridftp.bro) rest_target(${psd} base/protocols/ftp/main.bro) rest_target(${psd} base/protocols/ftp/utils-commands.bro) -rest_target(${psd} base/protocols/http/file-analysis.bro) -rest_target(${psd} base/protocols/http/file-extract.bro) -rest_target(${psd} base/protocols/http/file-hash.bro) -rest_target(${psd} base/protocols/http/file-ident.bro) +rest_target(${psd} base/protocols/ftp/utils.bro) +rest_target(${psd} base/protocols/http/entities.bro) +rest_target(${psd} base/protocols/http/files.bro) rest_target(${psd} base/protocols/http/main.bro) rest_target(${psd} base/protocols/http/utils.bro) rest_target(${psd} base/protocols/irc/dcc-send.bro) -rest_target(${psd} base/protocols/irc/file-analysis.bro) +rest_target(${psd} base/protocols/irc/files.bro) rest_target(${psd} base/protocols/irc/main.bro) rest_target(${psd} base/protocols/modbus/consts.bro) rest_target(${psd} base/protocols/modbus/main.bro) -rest_target(${psd} base/protocols/smtp/entities-excerpt.bro) rest_target(${psd} base/protocols/smtp/entities.bro) -rest_target(${psd} base/protocols/smtp/file-analysis.bro) +rest_target(${psd} base/protocols/smtp/files.bro) rest_target(${psd} base/protocols/smtp/main.bro) rest_target(${psd} base/protocols/socks/consts.bro) rest_target(${psd} base/protocols/socks/main.bro) @@ -182,6 +181,8 @@ rest_target(${psd} policy/frameworks/control/controllee.bro) rest_target(${psd} policy/frameworks/control/controller.bro) rest_target(${psd} policy/frameworks/dpd/detect-protocols.bro) rest_target(${psd} policy/frameworks/dpd/packet-segment-logging.bro) +rest_target(${psd} policy/frameworks/files/detect-MHR.bro) +rest_target(${psd} policy/frameworks/files/hash-all-files.bro) rest_target(${psd} policy/frameworks/intel/conn-established.bro) rest_target(${psd} policy/frameworks/intel/dns.bro) rest_target(${psd} policy/frameworks/intel/http-host-header.bro) @@ -214,7 +215,6 @@ rest_target(${psd} policy/protocols/dns/detect-external-names.bro) rest_target(${psd} policy/protocols/ftp/detect-bruteforcing.bro) rest_target(${psd} policy/protocols/ftp/detect.bro) rest_target(${psd} policy/protocols/ftp/software.bro) -rest_target(${psd} policy/protocols/http/detect-MHR.bro) rest_target(${psd} policy/protocols/http/detect-sqli.bro) rest_target(${psd} policy/protocols/http/detect-webapps.bro) rest_target(${psd} policy/protocols/http/header-names.bro) @@ -226,6 +226,7 @@ rest_target(${psd} policy/protocols/modbus/known-masters-slaves.bro) rest_target(${psd} policy/protocols/modbus/track-memmap.bro) rest_target(${psd} policy/protocols/smtp/blocklists.bro) rest_target(${psd} policy/protocols/smtp/detect-suspicious-orig.bro) +rest_target(${psd} policy/protocols/smtp/entities-excerpt.bro) rest_target(${psd} policy/protocols/smtp/software.bro) rest_target(${psd} policy/protocols/ssh/detect-bruteforcing.bro) rest_target(${psd} policy/protocols/ssh/geo-data.bro) diff --git a/scripts/base/frameworks/files/main.bro b/scripts/base/frameworks/files/main.bro index cc92932bbf..d0c381545b 100644 --- a/scripts/base/frameworks/files/main.bro +++ b/scripts/base/frameworks/files/main.bro @@ -139,7 +139,9 @@ export { ## ## f: the file. ## - ## args: the analyzer type to add along with any arguments it takes. + ## tag: the analyzer type. + ## + ## args: any parameters the analyzer takes. ## ## Returns: true if the analyzer will be added, or false if analysis ## for the *id* isn't currently active or the *args* @@ -156,7 +158,9 @@ export { ## ## Returns: true if the analyzer will be removed, or false if analysis ## for the *id* isn't currently active. - global remove_analyzer: function(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool; + global remove_analyzer: function(f: fa_file, + tag: Files::Tag, + args: AnalyzerArgs &default=AnalyzerArgs()): bool; ## Stops/ignores any further analysis of a given file. ## diff --git a/scripts/base/protocols/ftp/files.bro b/scripts/base/protocols/ftp/files.bro index 1d7b7670f4..9ed17ab2a4 100644 --- a/scripts/base/protocols/ftp/files.bro +++ b/scripts/base/protocols/ftp/files.bro @@ -1,4 +1,5 @@ @load ./main +@load ./utils @load base/utils/conn-ids @load base/frameworks/files diff --git a/scripts/base/protocols/http/files.bro b/scripts/base/protocols/http/files.bro index fd07dc096a..14dbb12989 100644 --- a/scripts/base/protocols/http/files.bro +++ b/scripts/base/protocols/http/files.bro @@ -1,6 +1,7 @@ @load ./main @load ./entities @load ./utils +@load base/utils/conn-ids @load base/frameworks/files module HTTP; @@ -18,13 +19,16 @@ function get_file_handle(c: connection, is_orig: bool): string if ( ! c?$http ) return ""; - local mime_depth = is_orig ? c$http$orig_mime_depth : c$http$resp_mime_depth; - if ( c$http$range_request ) + if ( c$http$range_request && ! is_orig ) { - return cat(Analyzer::ANALYZER_HTTP, is_orig, c$id$orig_h, mime_depth, build_url(c$http)); + # Any multipart responses from the server are pieces of same file + # that correspond to range requests, so don't use mime depth to + # identify the file. + return cat(Analyzer::ANALYZER_HTTP, is_orig, c$id$orig_h, build_url(c$http)); } else { + local mime_depth = is_orig ? c$http$orig_mime_depth : c$http$resp_mime_depth; return cat(Analyzer::ANALYZER_HTTP, c$start_time, is_orig, c$http$trans_depth, mime_depth, id_string(c$id)); } diff --git a/scripts/policy/protocols/http/detect-MHR.bro b/scripts/policy/protocols/http/detect-MHR.bro deleted file mode 100644 index 0594276c93..0000000000 --- a/scripts/policy/protocols/http/detect-MHR.bro +++ /dev/null @@ -1,44 +0,0 @@ -##! Detect file downloads over HTTP that have MD5 sums matching files in Team -##! Cymru's Malware Hash Registry (http://www.team-cymru.org/Services/MHR/). -##! By default, not all file transfers will have MD5 sums calculated. Read the -##! documentation for the :doc:base/protocols/http/file-hash.bro script to see -##! how to configure which transfers will have hashes calculated. - -@load base/frameworks/notice -@load base/protocols/http - -module HTTP; - -export { - redef enum Notice::Type += { - ## The MD5 sum of a file transferred over HTTP matched in the - ## malware hash registry. - Malware_Hash_Registry_Match - }; - - ## The malware hash registry runs each malware sample through several A/V engines. - ## Team Cymru returns a percentage to indicate how many A/V engines flagged the - ## sample as malicious. This threshold allows you to require a minimum detection - ## rate (default: 50%). - const MHR_threshold = 50 &redef; -} - -event log_http(rec: HTTP::Info) - { - if ( rec?$md5 ) - { - local hash_domain = fmt("%s.malware.hash.cymru.com", rec$md5); - when ( local MHR_result = lookup_hostname_txt(hash_domain) ) - { - # Data is returned as " " - local MHR_answer = split1(MHR_result, / /); - if ( |MHR_answer| == 2 && to_count(MHR_answer[2]) >= MHR_threshold ) - { - local url = HTTP::build_url_http(rec); - local message = fmt("%s %s %s", rec$id$orig_h, rec$md5, url); - NOTICE([$note=Malware_Hash_Registry_Match, - $msg=message, $id=rec$id]); - } - } - } - } diff --git a/scripts/policy/protocols/smtp/entities-excerpt.bro b/scripts/policy/protocols/smtp/entities-excerpt.bro index 1ecd100571..423fae1ada 100644 --- a/scripts/policy/protocols/smtp/entities-excerpt.bro +++ b/scripts/policy/protocols/smtp/entities-excerpt.bro @@ -1,12 +1,12 @@ ##! This script is for optionally adding a body excerpt to the SMTP ##! entities log. -@load ./entities +@load base/protocols/smtp/entities module SMTP; export { - redef record SMTP::EntityInfo += { + redef record SMTP::Entity+= { ## The entity body excerpt. excerpt: string &log &default=""; }; @@ -31,7 +31,6 @@ event file_new(f: fa_file) &priority=5 if ( ! c?$smtp ) next; if ( default_entity_excerpt_len > 0 ) - c$smtp$current_entity$excerpt = - f$bof_buffer[0:default_entity_excerpt_len]; + c$smtp$entity$excerpt = f$bof_buffer[0:default_entity_excerpt_len]; } } diff --git a/scripts/test-all-policy.bro b/scripts/test-all-policy.bro index 1fd34d6f2f..2164343d37 100644 --- a/scripts/test-all-policy.bro +++ b/scripts/test-all-policy.bro @@ -14,6 +14,8 @@ # @load frameworks/control/controller.bro @load frameworks/dpd/detect-protocols.bro @load frameworks/dpd/packet-segment-logging.bro +@load frameworks/files/detect-MHR.bro +@load frameworks/files/hash-all-files.bro @load frameworks/intel/__load__.bro @load frameworks/intel/conn-established.bro @load frameworks/intel/dns.bro @@ -50,7 +52,6 @@ @load protocols/ftp/detect-bruteforcing.bro @load protocols/ftp/detect.bro @load protocols/ftp/software.bro -@load protocols/http/detect-MHR.bro @load protocols/http/detect-sqli.bro @load protocols/http/detect-webapps.bro @load protocols/http/header-names.bro @@ -62,6 +63,7 @@ @load protocols/modbus/track-memmap.bro @load protocols/smtp/blocklists.bro @load protocols/smtp/detect-suspicious-orig.bro +@load protocols/smtp/entities-excerpt.bro @load protocols/smtp/software.bro @load protocols/ssh/detect-bruteforcing.bro @load protocols/ssh/geo-data.bro diff --git a/src/util.cc b/src/util.cc index 5a63be22cb..0651925898 100644 --- a/src/util.cc +++ b/src/util.cc @@ -1573,6 +1573,20 @@ void operator delete[](void* v) #endif +// Being selective of which components of MAGIC_NO_CHECK_BUILTIN are actually +// known to be problematic, but keeping rest of libmagic's builtin checks. +#define DISABLE_LIBMAGIC_BUILTIN_CHECKS ( \ +/* MAGIC_NO_CHECK_COMPRESS | */ \ +/* MAGIC_NO_CHECK_TAR | */ \ +/* MAGIC_NO_CHECK_SOFT | */ \ +/* MAGIC_NO_CHECK_APPTYPE | */ \ +/* MAGIC_NO_CHECK_ELF | */ \ +/* MAGIC_NO_CHECK_TEXT | */ \ + MAGIC_NO_CHECK_CDF | \ + MAGIC_NO_CHECK_TOKENS \ +/* MAGIC_NO_CHECK_ENCODING */ \ +) + void bro_init_magic(magic_t* cookie_ptr, int flags) { if ( ! cookie_ptr || *cookie_ptr ) diff --git a/src/util.h b/src/util.h index 91ed8f2888..cafa63b7e8 100644 --- a/src/util.h +++ b/src/util.h @@ -377,23 +377,6 @@ struct CompareString } }; -// Older versions of libmagic may not define the MAGIC_NO_CHECK_BUILTIN -// convenience macro and other newer versions seem to have a typo that makes -// it unusable, so just make a different one now with all known flags for -// builtin libmagic components that should be disabled so that Bro only -// uses the custom magic database shipped with it. -#define DISABLE_LIBMAGIC_BUILTIN_CHECKS ( \ - MAGIC_NO_CHECK_COMPRESS | \ - MAGIC_NO_CHECK_TAR | \ -/* MAGIC_NO_CHECK_SOFT | */ \ - MAGIC_NO_CHECK_APPTYPE | \ - MAGIC_NO_CHECK_ELF | \ - MAGIC_NO_CHECK_TEXT | \ - MAGIC_NO_CHECK_CDF | \ - MAGIC_NO_CHECK_TOKENS | \ - MAGIC_NO_CHECK_ENCODING \ -) - extern magic_t magic_desc_cookie; extern magic_t magic_mime_cookie; diff --git a/testing/btest/Baseline/core.tunnels.ayiya/http.log b/testing/btest/Baseline/core.tunnels.ayiya/http.log index 04692a3547..cc0cf32148 100644 --- a/testing/btest/Baseline/core.tunnels.ayiya/http.log +++ b/testing/btest/Baseline/core.tunnels.ayiya/http.log @@ -3,10 +3,10 @@ #empty_field (empty) #unset_field - #path http -#open 2013-07-23-05-12-58 +#open 2013-07-25-21-12-29 #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied orig_fuids orig_mime_types resp_fuids resp_mime_types #types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] vector[string] vector[string] vector[string] vector[string] -1257655301.652206 5OKnoww6xl4 2001:4978:f:4c::2 53382 2001:4860:b002::68 80 1 GET ipv6.google.com / - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en; rv:1.9.0.15pre) Gecko/2009091516 Camino/2.0b4 (like Firefox/3.0.15pre) 0 10102 200 OK - - - (empty) - - - - - meGKu6goEyd application/octet-stream +1257655301.652206 5OKnoww6xl4 2001:4978:f:4c::2 53382 2001:4860:b002::68 80 1 GET ipv6.google.com / - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en; rv:1.9.0.15pre) Gecko/2009091516 Camino/2.0b4 (like Firefox/3.0.15pre) 0 10102 200 OK - - - (empty) - - - - - meGKu6goEyd text/html 1257655302.514424 5OKnoww6xl4 2001:4978:f:4c::2 53382 2001:4860:b002::68 80 2 GET ipv6.google.com /csi?v=3&s=webhp&action=&tran=undefined&e=17259,19771,21517,21766,21887,22212&ei=BUz2Su7PMJTglQfz3NzCAw&rt=prt.77,xjs.565,ol.645 http://ipv6.google.com/ Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en; rv:1.9.0.15pre) Gecko/2009091516 Camino/2.0b4 (like Firefox/3.0.15pre) 0 0 204 No Content - - - (empty) - - - - - - - 1257655303.603569 5OKnoww6xl4 2001:4978:f:4c::2 53382 2001:4860:b002::68 80 3 GET ipv6.google.com /gen_204?atyp=i&ct=fade&cad=1254&ei=BUz2Su7PMJTglQfz3NzCAw&zx=1257655303600 http://ipv6.google.com/ Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en; rv:1.9.0.15pre) Gecko/2009091516 Camino/2.0b4 (like Firefox/3.0.15pre) 0 0 204 No Content - - - (empty) - - - - - - - -#close 2013-07-23-05-12-58 +#close 2013-07-25-21-12-29 diff --git a/testing/btest/Baseline/core.tunnels.gtp.different_dl_and_ul/http.log b/testing/btest/Baseline/core.tunnels.gtp.different_dl_and_ul/http.log index e88be88763..8f9ac07c96 100644 --- a/testing/btest/Baseline/core.tunnels.gtp.different_dl_and_ul/http.log +++ b/testing/btest/Baseline/core.tunnels.gtp.different_dl_and_ul/http.log @@ -3,9 +3,9 @@ #empty_field (empty) #unset_field - #path http -#open 2013-05-21-21-11-21 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extracted_request_files extracted_response_files -#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string vector[string] vector[string] -1333458850.340368 arKYeMETxOg 10.131.17.170 51803 173.199.115.168 80 1 GET cdn.epicgameads.com /ads/flash/728x90_nx8com.swf?clickTAG=http://www.epicgameads.com/ads/bannerclickPage.php?id=e3ubwU6IF&pd=1&adid=0&icpc=1&axid=0&uctt=1&channel=4&cac=1&t=728x90&cb=1333458879 http://www.epicgameads.com/ads/banneriframe.php?id=e3ubwU6IF&t=728x90&channel=4&cb=1333458905296 Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0) 0 31461 200 OK - - - (empty) - - - application/x-shockwave-flash - - - -1333458850.399501 arKYeMETxOg 10.131.17.170 51803 173.199.115.168 80 2 GET cdn.epicgameads.com /ads/flash/728x90_nx8com.swf?clickTAG=http://www.epicgameads.com/ads/bannerclickPage.php?id=e3ubwU6IF&pd=1&adid=0&icpc=1&axid=0&uctt=1&channel=0&cac=1&t=728x90&cb=1333458881 http://www.epicgameads.com/ads/banneriframe.php?id=e3ubwU6IF&t=728x90&cb=1333458920207 Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0) 0 31461 200 OK - - - (empty) - - - application/x-shockwave-flash - - - -#close 2013-05-21-21-11-21 +#open 2013-07-25-16-23-41 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied orig_fuids orig_mime_types resp_fuids resp_mime_types +#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] vector[string] vector[string] vector[string] vector[string] +1333458850.340368 arKYeMETxOg 10.131.17.170 51803 173.199.115.168 80 1 GET cdn.epicgameads.com /ads/flash/728x90_nx8com.swf?clickTAG=http://www.epicgameads.com/ads/bannerclickPage.php?id=e3ubwU6IF&pd=1&adid=0&icpc=1&axid=0&uctt=1&channel=4&cac=1&t=728x90&cb=1333458879 http://www.epicgameads.com/ads/banneriframe.php?id=e3ubwU6IF&t=728x90&channel=4&cb=1333458905296 Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0) 0 31461 200 OK - - - (empty) - - - - - 6jqjOyeITn5 application/x-shockwave-flash +1333458850.399501 arKYeMETxOg 10.131.17.170 51803 173.199.115.168 80 2 GET cdn.epicgameads.com /ads/flash/728x90_nx8com.swf?clickTAG=http://www.epicgameads.com/ads/bannerclickPage.php?id=e3ubwU6IF&pd=1&adid=0&icpc=1&axid=0&uctt=1&channel=0&cac=1&t=728x90&cb=1333458881 http://www.epicgameads.com/ads/banneriframe.php?id=e3ubwU6IF&t=728x90&cb=1333458920207 Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0) 0 31461 200 OK - - - (empty) - - - - - A0xot7xPc22 application/x-shockwave-flash +#close 2013-07-25-16-23-41 diff --git a/testing/btest/Baseline/core.tunnels.gtp.outer_ip_frag/http.log b/testing/btest/Baseline/core.tunnels.gtp.outer_ip_frag/http.log index 8f2893caa7..45b88b7813 100644 --- a/testing/btest/Baseline/core.tunnels.gtp.outer_ip_frag/http.log +++ b/testing/btest/Baseline/core.tunnels.gtp.outer_ip_frag/http.log @@ -3,8 +3,8 @@ #empty_field (empty) #unset_field - #path http -#open 2013-05-21-21-11-22 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extracted_request_files extracted_response_files -#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string vector[string] vector[string] -1333458850.375568 arKYeMETxOg 10.131.47.185 1923 79.101.110.141 80 1 GET o-o.preferred.telekomrs-beg1.v2.lscache8.c.youtube.com /videoplayback?upn=MTU2MDY5NzQ5OTM0NTI3NDY4NDc&sparams=algorithm,burst,cp,factor,id,ip,ipbits,itag,source,upn,expire&fexp=912300,907210&algorithm=throttle-factor&itag=34&ip=212.0.0.0&burst=40&sver=3&signature=832FB1042E20780CFCA77A4DB5EA64AC593E8627.D1166C7E8365732E52DAFD68076DAE0146E0AE01&source=youtube&expire=1333484980&key=yt1&ipbits=8&factor=1.25&cp=U0hSSFRTUl9NSkNOMl9MTVZKOjh5eEN2SG8tZF84&id=ebf1e932d4bd1286&cm2=1 http://s.ytimg.com/yt/swfbin/watch_as3-vflqrJwOA.swf Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko; X-SBLSP) Chrome/17.0.963.83 Safari/535.11 0 56320 206 Partial Content - - - (empty) - - - application/octet-stream - - - -#close 2013-05-21-21-11-22 +#open 2013-07-25-21-12-32 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied orig_fuids orig_mime_types resp_fuids resp_mime_types +#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] vector[string] vector[string] vector[string] vector[string] +1333458850.375568 arKYeMETxOg 10.131.47.185 1923 79.101.110.141 80 1 GET o-o.preferred.telekomrs-beg1.v2.lscache8.c.youtube.com /videoplayback?upn=MTU2MDY5NzQ5OTM0NTI3NDY4NDc&sparams=algorithm,burst,cp,factor,id,ip,ipbits,itag,source,upn,expire&fexp=912300,907210&algorithm=throttle-factor&itag=34&ip=212.0.0.0&burst=40&sver=3&signature=832FB1042E20780CFCA77A4DB5EA64AC593E8627.D1166C7E8365732E52DAFD68076DAE0146E0AE01&source=youtube&expire=1333484980&key=yt1&ipbits=8&factor=1.25&cp=U0hSSFRTUl9NSkNOMl9MTVZKOjh5eEN2SG8tZF84&id=ebf1e932d4bd1286&cm2=1 http://s.ytimg.com/yt/swfbin/watch_as3-vflqrJwOA.swf Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko; X-SBLSP) Chrome/17.0.963.83 Safari/535.11 0 56320 206 Partial Content - - - (empty) - - - - - oypNlaRdgs7 application/octet-stream +#close 2013-07-25-21-12-32 diff --git a/testing/btest/Baseline/core.tunnels.teredo/http.log b/testing/btest/Baseline/core.tunnels.teredo/http.log index 4e3cdfd61d..1ecf0884e2 100644 --- a/testing/btest/Baseline/core.tunnels.teredo/http.log +++ b/testing/btest/Baseline/core.tunnels.teredo/http.log @@ -3,11 +3,11 @@ #empty_field (empty) #unset_field - #path http -#open 2013-05-21-21-11-21 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extracted_request_files extracted_response_files -#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string vector[string] vector[string] -1210953057.917183 3PKsZ2Uye21 192.168.2.16 1578 75.126.203.78 80 1 POST download913.avast.com /cgi-bin/iavs4stats.cgi - Syncer/4.80 (av_pro-1169;f) 589 0 204 - - - (empty) - - - text/plain - - - -1210953061.585996 70MGiRM1Qf4 2001:0:4137:9e50:8000:f12a:b9c8:2815 1286 2001:4860:0:2001::68 80 1 GET ipv6.google.com / - Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5 0 6640 200 OK - - - (empty) - - - text/html - - - -1210953073.381474 70MGiRM1Qf4 2001:0:4137:9e50:8000:f12a:b9c8:2815 1286 2001:4860:0:2001::68 80 2 GET ipv6.google.com /search?hl=en&q=Wireshark+!&btnG=Google+Search http://ipv6.google.com/ Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5 0 25119 200 OK - - - (empty) - - - text/html - - - -1210953074.674817 c4Zw9TmAE05 192.168.2.16 1580 67.228.110.120 80 1 GET www.wireshark.org / http://ipv6.google.com/search?hl=en&q=Wireshark+%21&btnG=Google+Search Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5 0 11845 200 OK - - - (empty) - - - application/xml - - - -#close 2013-05-21-21-11-21 +#open 2013-07-25-16-23-17 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied orig_fuids orig_mime_types resp_fuids resp_mime_types +#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] vector[string] vector[string] vector[string] vector[string] +1210953057.917183 3PKsZ2Uye21 192.168.2.16 1578 75.126.203.78 80 1 POST download913.avast.com /cgi-bin/iavs4stats.cgi - Syncer/4.80 (av_pro-1169;f) 589 0 204 - - - (empty) - - - tZX578lAmo3 text/plain - - +1210953061.585996 70MGiRM1Qf4 2001:0:4137:9e50:8000:f12a:b9c8:2815 1286 2001:4860:0:2001::68 80 1 GET ipv6.google.com / - Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5 0 6640 200 OK - - - (empty) - - - - - nkfWSsPnjX7 text/html +1210953073.381474 70MGiRM1Qf4 2001:0:4137:9e50:8000:f12a:b9c8:2815 1286 2001:4860:0:2001::68 80 2 GET ipv6.google.com /search?hl=en&q=Wireshark+!&btnG=Google+Search http://ipv6.google.com/ Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5 0 25119 200 OK - - - (empty) - - - - - fk5lVax7K37 text/html +1210953074.674817 c4Zw9TmAE05 192.168.2.16 1580 67.228.110.120 80 1 GET www.wireshark.org / http://ipv6.google.com/search?hl=en&q=Wireshark+%21&btnG=Google+Search Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5 0 11845 200 OK - - - (empty) - - - - - 6wF1NFmBUza application/xml +#close 2013-07-25-16-23-17 diff --git a/testing/btest/Baseline/core.tunnels.teredo_bubble_with_payload/http.log b/testing/btest/Baseline/core.tunnels.teredo_bubble_with_payload/http.log index 65ec33186e..0c8c448e30 100644 --- a/testing/btest/Baseline/core.tunnels.teredo_bubble_with_payload/http.log +++ b/testing/btest/Baseline/core.tunnels.teredo_bubble_with_payload/http.log @@ -3,9 +3,9 @@ #empty_field (empty) #unset_field - #path http -#open 2013-05-21-21-11-22 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extracted_request_files extracted_response_files -#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string vector[string] vector[string] -1340127577.361683 FrJExwHcSal 2001:0:4137:9e50:8000:f12a:b9c8:2815 1286 2001:4860:0:2001::68 80 1 GET ipv6.google.com / - Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5 0 6640 200 OK - - - (empty) - - - text/html - - - -1340127577.379360 FrJExwHcSal 2001:0:4137:9e50:8000:f12a:b9c8:2815 1286 2001:4860:0:2001::68 80 2 GET ipv6.google.com /search?hl=en&q=Wireshark+!&btnG=Google+Search http://ipv6.google.com/ Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5 0 25119 200 OK - - - (empty) - - - text/html - - - -#close 2013-05-21-21-11-22 +#open 2013-07-25-16-22-21 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied orig_fuids orig_mime_types resp_fuids resp_mime_types +#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] vector[string] vector[string] vector[string] vector[string] +1340127577.361683 FrJExwHcSal 2001:0:4137:9e50:8000:f12a:b9c8:2815 1286 2001:4860:0:2001::68 80 1 GET ipv6.google.com / - Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5 0 6640 200 OK - - - (empty) - - - - - RzAMHHXJral text/html +1340127577.379360 FrJExwHcSal 2001:0:4137:9e50:8000:f12a:b9c8:2815 1286 2001:4860:0:2001::68 80 2 GET ipv6.google.com /search?hl=en&q=Wireshark+!&btnG=Google+Search http://ipv6.google.com/ Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5 0 25119 200 OK - - - (empty) - - - - - vOmb3ToMKRg text/html +#close 2013-07-25-16-22-21 diff --git a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log index b7585a1477..4bcda86272 100644 --- a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2013-07-05-05-20-50 +#open 2013-07-25-19-59-47 #fields name #types string scripts/base/init-bare.bro @@ -84,9 +84,11 @@ scripts/base/init-bare.bro scripts/base/frameworks/analyzer/main.bro scripts/base/frameworks/packet-filter/utils.bro build/scripts/base/bif/analyzer.bif.bro - scripts/base/frameworks/file-analysis/__load__.bro - scripts/base/frameworks/file-analysis/main.bro + scripts/base/frameworks/files/__load__.bro + scripts/base/frameworks/files/main.bro build/scripts/base/bif/file_analysis.bif.bro + scripts/base/utils/site.bro + scripts/base/utils/patterns.bro scripts/policy/misc/loaded-scripts.bro scripts/base/utils/paths.bro -#close 2013-07-05-05-20-50 +#close 2013-07-25-19-59-47 diff --git a/testing/btest/Baseline/istate.events-ssl/receiver.http.log b/testing/btest/Baseline/istate.events-ssl/receiver.http.log index be7e6e5692..dd61de5424 100644 --- a/testing/btest/Baseline/istate.events-ssl/receiver.http.log +++ b/testing/btest/Baseline/istate.events-ssl/receiver.http.log @@ -3,8 +3,8 @@ #empty_field (empty) #unset_field - #path http -#open 2013-05-21-21-11-32 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extracted_request_files extracted_response_files -#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string vector[string] vector[string] -1369170691.550143 arKYeMETxOg 141.42.64.125 56730 125.190.109.199 80 1 GET www.icir.org / - Wget/1.10 0 9130 200 OK - - - (empty) - - - - - - - -#close 2013-05-21-21-11-33 +#open 2013-07-25-21-10-36 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied orig_fuids orig_mime_types resp_fuids resp_mime_types +#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] vector[string] vector[string] vector[string] vector[string] +1374786635.573905 arKYeMETxOg 141.42.64.125 56730 125.190.109.199 80 1 GET www.icir.org / - Wget/1.10 0 9130 200 OK - - - (empty) - - - - - - - +#close 2013-07-25-21-10-37 diff --git a/testing/btest/Baseline/istate.events-ssl/sender.http.log b/testing/btest/Baseline/istate.events-ssl/sender.http.log index be7e6e5692..dd61de5424 100644 --- a/testing/btest/Baseline/istate.events-ssl/sender.http.log +++ b/testing/btest/Baseline/istate.events-ssl/sender.http.log @@ -3,8 +3,8 @@ #empty_field (empty) #unset_field - #path http -#open 2013-05-21-21-11-32 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extracted_request_files extracted_response_files -#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string vector[string] vector[string] -1369170691.550143 arKYeMETxOg 141.42.64.125 56730 125.190.109.199 80 1 GET www.icir.org / - Wget/1.10 0 9130 200 OK - - - (empty) - - - - - - - -#close 2013-05-21-21-11-33 +#open 2013-07-25-21-10-36 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied orig_fuids orig_mime_types resp_fuids resp_mime_types +#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] vector[string] vector[string] vector[string] vector[string] +1374786635.573905 arKYeMETxOg 141.42.64.125 56730 125.190.109.199 80 1 GET www.icir.org / - Wget/1.10 0 9130 200 OK - - - (empty) - - - - - - - +#close 2013-07-25-21-10-37 diff --git a/testing/btest/Baseline/istate.events/receiver.http.log b/testing/btest/Baseline/istate.events/receiver.http.log index ae693399c3..aebe4dea7b 100644 --- a/testing/btest/Baseline/istate.events/receiver.http.log +++ b/testing/btest/Baseline/istate.events/receiver.http.log @@ -3,8 +3,8 @@ #empty_field (empty) #unset_field - #path http -#open 2013-05-21-21-11-40 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extracted_request_files extracted_response_files -#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string vector[string] vector[string] -1369170699.511968 arKYeMETxOg 141.42.64.125 56730 125.190.109.199 80 1 GET www.icir.org / - Wget/1.10 0 9130 200 OK - - - (empty) - - - - - - - -#close 2013-05-21-21-11-41 +#open 2013-07-25-20-26-59 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied orig_fuids orig_mime_types resp_fuids resp_mime_types +#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] vector[string] vector[string] vector[string] vector[string] +1374784018.898860 arKYeMETxOg 141.42.64.125 56730 125.190.109.199 80 1 GET www.icir.org / - Wget/1.10 0 9130 200 OK - - - (empty) - - - - - - - +#close 2013-07-25-20-27-00 diff --git a/testing/btest/Baseline/istate.events/sender.http.log b/testing/btest/Baseline/istate.events/sender.http.log index ae693399c3..b70ba733bd 100644 --- a/testing/btest/Baseline/istate.events/sender.http.log +++ b/testing/btest/Baseline/istate.events/sender.http.log @@ -3,8 +3,8 @@ #empty_field (empty) #unset_field - #path http -#open 2013-05-21-21-11-40 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extracted_request_files extracted_response_files -#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string vector[string] vector[string] -1369170699.511968 arKYeMETxOg 141.42.64.125 56730 125.190.109.199 80 1 GET www.icir.org / - Wget/1.10 0 9130 200 OK - - - (empty) - - - - - - - -#close 2013-05-21-21-11-41 +#open 2013-07-25-21-05-37 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied orig_fuids orig_mime_types resp_fuids resp_mime_types +#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] vector[string] vector[string] vector[string] vector[string] +1374786336.338273 arKYeMETxOg 141.42.64.125 56730 125.190.109.199 80 1 GET www.icir.org / - Wget/1.10 0 9130 200 OK - - - (empty) - - - - - - - +#close 2013-07-25-21-05-38 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out index ddc3449a4c..cbd60840bf 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out @@ -4,6 +4,7 @@ FILE_BOF_BUFFER ^J0.26 | 201 MIME_TYPE text/plain +FILE_OVER_NEW_CONNECTION file_stream, file #0, 1500, ^J0.26 | 2012-08-24 15:10:04 -0700^J^J * Fixing update-changes, which could pick the wrong control file. (Robin Sommer)^J^J * Fixing GPG signing script. (Robin Sommer)^J^J0.25 | 2012-08-01 13:55:46 -0500^J^J * Fix configure script to exit with non-zero status on error (Jon Siwek)^J^J0.24 | 2012-07-05 12:50:43 -0700^J^J * Raise minimum required CMake version to 2.6.3 (Jon Siwek)^J^J * Adding script to delete old fully-merged branches. (Robin Sommer)^J^J0.23-2 | 2012-01-25 13:24:01 -0800^J^J * Fix a bro-cut error message. (Daniel Thayer)^J^J0.23 | 2012-01-11 12:16:11 -0800^J^J * Tweaks to release scripts, plus a new one for signing files.^J (Robin Sommer)^J^J0.22 | 2012-01-10 16:45:19 -0800^J^J * Tweaks for OpenBSD support. (Jon Siwek)^J^J * bro-cut extensions and fixes. (Robin Sommer)^J ^J - If no field names are given on the command line, we now pass through^J all fields. Adresses #657.^J^J - Removing some GNUism from awk script. Addresses #653.^J^J - Added option for time output in UTC. Addresses #668.^J^J - Added output field separator option -F. Addresses #649.^J^J - Fixing option -c: only some header lines were passed through^J rather than all. (Robin Sommer)^J^J * Fix parallel make portability. (Jon Siwek)^J^J0.21-9 | 2011-11-07 05:44:14 -0800^J^J * Fixing compiler warnings. Addresses #388. (Jon Siwek)^J^J0.21-2 | 2011-11-02 18:12:13 -0700^J^J * Fix for misnaming temp file in update-changes script. (Robin Sommer)^J^J0.21-1 | 2011-11-02 18:10:39 -0700^J^J * Little fix for make-relea file_chunk, file #0, 1500, 0, ^J0.26 | 2012-08-24 15:10:04 -0700^J^J * Fixing update-changes, which could pick the wrong control file. (Robin Sommer)^J^J * Fixing GPG signing script. (Robin Sommer)^J^J0.25 | 2012-08-01 13:55:46 -0500^J^J * Fix configure script to exit with non-zero status on error (Jon Siwek)^J^J0.24 | 2012-07-05 12:50:43 -0700^J^J * Raise minimum required CMake version to 2.6.3 (Jon Siwek)^J^J * Adding script to delete old fully-merged branches. (Robin Sommer)^J^J0.23-2 | 2012-01-25 13:24:01 -0800^J^J * Fix a bro-cut error message. (Daniel Thayer)^J^J0.23 | 2012-01-11 12:16:11 -0800^J^J * Tweaks to release scripts, plus a new one for signing files.^J (Robin Sommer)^J^J0.22 | 2012-01-10 16:45:19 -0800^J^J * Tweaks for OpenBSD support. (Jon Siwek)^J^J * bro-cut extensions and fixes. (Robin Sommer)^J ^J - If no field names are given on the command line, we now pass through^J all fields. Adresses #657.^J^J - Removing some GNUism from awk script. Addresses #653.^J^J - Added option for time output in UTC. Addresses #668.^J^J - Added output field separator option -F. Addresses #649.^J^J - Fixing option -c: only some header lines were passed through^J rather than all. (Robin Sommer)^J^J * Fix parallel make portability. (Jon Siwek)^J^J0.21-9 | 2011-11-07 05:44:14 -0800^J^J * Fixing compiler warnings. Addresses #388. (Jon Siwek)^J^J0.21-2 | 2011-11-02 18:12:13 -0700^J^J * Fix for misnaming temp file in update-changes script. (Robin Sommer)^J^J0.21-1 | 2011-11-02 18:10:39 -0700^J^J * Little fix for make-relea file_stream, file #0, 1024, se script, which could pick out the wrong^J tag. (Robin Sommer)^J^J0.21 | 2011-10-27 17:40:45 -0700^J^J * Fixing bro-cut's usage message and argument error handling. (Robin Sommer)^J^J * Bugfix in update-changes script. (Robin Sommer)^J^J * update-changes now ignores commits it did itself. (Robin Sommer)^J^J * Fix a bug in the update-changes script. (Robin Sommer)^J^J * bro-cut now always installs to $prefix/bin by `make install`. (Jon Siwek)^J^J * Options to adjust time format for bro-cut. (Robin Sommer)^J^J The default with -d is now ISO format. The new option "-D "^J specifies a custom strftime()-style format string. Alternatively,^J the environment variable BRO_CUT_TIMEFMT can set the format as^J well.^J^J * bro-cut now understands the field separator header. (Robin Sommer)^J^J * Renaming options -h/-H -> -c/-C, and doing some general cleanup.^J^J0.2 | 2011-10-25 19:53:57 -0700^J^J * Adding support for replacing version string in a setup.py. (Robin^J Sommer)^J^J * Change generated root cert DN indices f diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out index 4b572d5df9..eb62690f91 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out @@ -4,6 +4,7 @@ FILE_BOF_BUFFER ^J0.26 | 201 MIME_TYPE text/plain +FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 4705, 0 [orig_h=141.142.228.5, orig_p=59856/tcp, resp_h=192.150.187.43, resp_p=80/tcp] diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.set_timeout_interval/bro..stdout b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.set_timeout_interval/bro..stdout index 160a51a543..e78f5c8c17 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.set_timeout_interval/bro..stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.set_timeout_interval/bro..stdout @@ -2,6 +2,7 @@ FILE_NEW file #0, 0, 0 MIME_TYPE application/x-dosexec +FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 1022920, 0 [orig_h=192.168.72.14, orig_p=3254/tcp, resp_h=65.54.95.206, resp_p=80/tcp] @@ -11,6 +12,7 @@ FILE_NEW file #1, 0, 0 MIME_TYPE application/octet-stream +FILE_OVER_NEW_CONNECTION FILE_TIMEOUT FILE_TIMEOUT FILE_STATE_REMOVE diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out index f7182027aa..13cfe5de58 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out @@ -4,3 +4,4 @@ FILE_BOF_BUFFER ^J0.26 | 201 MIME_TYPE text/plain +FILE_OVER_NEW_CONNECTION diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out index c810ce15e5..eba43b94a4 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out @@ -3,7 +3,7 @@ file #0, 0, 0 FILE_BOF_BUFFER The Nationa MIME_TYPE -application/octet-stream +text/x-pascal FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 16557, 0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out index 2b46d02042..d42db4b90a 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out @@ -4,6 +4,7 @@ FILE_BOF_BUFFER {^J "origin MIME_TYPE text/plain +FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 197, 0 [orig_h=141.142.228.5, orig_p=50153/tcp, resp_h=54.243.118.187, resp_p=80/tcp] diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out index bb2f622969..219aad4eff 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out @@ -4,6 +4,7 @@ FILE_BOF_BUFFER ^J0.26 | 201 MIME_TYPE text/plain +FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 4705, 0 [orig_h=141.142.228.5, orig_p=59856/tcp, resp_h=192.150.187.43, resp_p=80/tcp] diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.multipart/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.multipart/out index 4b6fa76c0c..da42f4fd68 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.multipart/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.multipart/out @@ -4,6 +4,7 @@ FILE_BOF_BUFFER test^M^J MIME_TYPE text/plain +FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 6, 0 [orig_h=141.142.228.5, orig_p=57262/tcp, resp_h=54.243.88.146, resp_p=80/tcp] @@ -17,6 +18,7 @@ FILE_BOF_BUFFER test2^M^J MIME_TYPE text/plain +FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #1, 7, 0 [orig_h=141.142.228.5, orig_p=57262/tcp, resp_h=54.243.88.146, resp_p=80/tcp] @@ -30,6 +32,7 @@ FILE_BOF_BUFFER test3^M^J MIME_TYPE text/plain +FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #2, 7, 0 [orig_h=141.142.228.5, orig_p=57262/tcp, resp_h=54.243.88.146, resp_p=80/tcp] @@ -43,6 +46,7 @@ FILE_BOF_BUFFER {^J "data": MIME_TYPE text/plain +FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #3, 465, 0 [orig_h=141.142.228.5, orig_p=57262/tcp, resp_h=54.243.88.146, resp_p=80/tcp] diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out index f8f2538e92..077fb5282c 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out @@ -3,6 +3,7 @@ file #0, 0, 0 MIME_TYPE application/pdf FILE_OVER_NEW_CONNECTION +FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 555523, 0 [orig_h=10.101.84.70, orig_p=10978/tcp, resp_h=129.174.93.161, resp_p=80/tcp] diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out index b2a0cb66a2..9c05f311f3 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out @@ -2,6 +2,7 @@ FILE_NEW file #0, 0, 0 MIME_TYPE application/x-dosexec +FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 1022920, 0 [orig_h=192.168.72.14, orig_p=3254/tcp, resp_h=65.54.95.206, resp_p=80/tcp] @@ -11,6 +12,7 @@ FILE_NEW file #1, 0, 0 MIME_TYPE application/octet-stream +FILE_OVER_NEW_CONNECTION FILE_TIMEOUT FILE_STATE_REMOVE file #1, 206024, 0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out index 7c5e9dfeca..d85a9de314 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out @@ -3,6 +3,7 @@ file #0, 0, 0 MIME_TYPE application/octet-stream FILE_OVER_NEW_CONNECTION +FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 498702, 0 [orig_h=10.45.179.94, orig_p=19950/tcp, resp_h=129.174.93.170, resp_p=80/tcp] diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out index 02ac2f0a7e..b85485cd1a 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out @@ -4,6 +4,7 @@ FILE_BOF_BUFFER /*^J******** MIME_TYPE text/plain +FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 2675, 0 [orig_h=192.168.1.104, orig_p=1673/tcp, resp_h=63.245.209.11, resp_p=80/tcp] @@ -17,6 +18,7 @@ FILE_BOF_BUFFER //-- Google MIME_TYPE text/plain +FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #1, 21421, 0 [orig_h=192.168.1.104, orig_p=1673/tcp, resp_h=63.245.209.11, resp_p=80/tcp] @@ -30,6 +32,7 @@ FILE_BOF_BUFFER GIF89a^D\0^D\0\xb3 MIME_TYPE image/gif +FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #2, 94, 0 [orig_h=192.168.1.104, orig_p=1673/tcp, resp_h=63.245.209.11, resp_p=80/tcp] @@ -44,6 +47,7 @@ FILE_BOF_BUFFER \x89PNG^M^J^Z^J\0\0\0 MIME_TYPE image/png +FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #3, 2349, 0 [orig_h=192.168.1.104, orig_p=1673/tcp, resp_h=63.245.209.11, resp_p=80/tcp] @@ -58,6 +62,7 @@ FILE_BOF_BUFFER \x89PNG^M^J^Z^J\0\0\0 MIME_TYPE image/png +FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #4, 27579, 0 [orig_h=192.168.1.104, orig_p=1673/tcp, resp_h=63.245.209.11, resp_p=80/tcp] diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out index 3103ecb39e..cedc396254 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out @@ -4,6 +4,7 @@ FILE_BOF_BUFFER hello world MIME_TYPE text/plain +FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 11, 0 [orig_h=141.142.228.5, orig_p=53595/tcp, resp_h=54.243.55.129, resp_p=80/tcp] @@ -18,6 +19,7 @@ FILE_BOF_BUFFER {^J "origin MIME_TYPE text/plain +FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #1, 366, 0 [orig_h=141.142.228.5, orig_p=53595/tcp, resp_h=54.243.55.129, resp_p=80/tcp] diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log deleted file mode 100644 index f95a70d50a..0000000000 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log +++ /dev/null @@ -1,10 +0,0 @@ -#separator \x09 -#set_separator , -#empty_field (empty) -#unset_field - -#path file_analysis -#open 2013-06-07-18-51-45 -#fields id parent_id source is_orig last_active seen_bytes total_bytes missing_bytes overflow_bytes timeout_interval bof_buffer_size mime_type timedout conn_uids extracted_files md5 sha1 sha256 -#types string string string bool time count count count count interval count string bool table[string] table[string] string string string -BYYd1GSNX5c - HTTP F 1362692527.009775 4705 4705 0 0 120.000000 1024 text/plain F UWkUyAuUGXf BYYd1GSNX5c-file 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18 -#close 2013-06-07-18-51-46 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/files.log b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/files.log new file mode 100644 index 0000000000..2663184b88 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/files.log @@ -0,0 +1,10 @@ +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path files +#open 2013-07-25-16-57-31 +#fields ts fuid tx_hosts rx_hosts conn_uids source depth analyzers mime_type filename duration local_orig is_orig seen_bytes total_bytes missing_bytes overflow_bytes timedout parent_fuid md5 sha1 sha256 extracted +#types time string table[addr] table[addr] table[string] string count table[string] string string interval bool bool count count count count bool string string string string string +1362692527.009721 G75mcAsU764 192.150.187.43 141.142.228.5 UWkUyAuUGXf HTTP 0 SHA256,DATA_EVENT,MD5,EXTRACT,SHA1 text/plain - 0.000054 - F 4705 4705 0 0 F - 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18 G75mcAsU764-file +#close 2013-07-25-16-57-31 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out index ac4e6e50fa..57f1f97b9c 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out @@ -4,6 +4,7 @@ FILE_BOF_BUFFER Hello^M^J^M^J ^M MIME_TYPE text/plain +FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE file #0, 79, 0 [orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp] @@ -17,6 +18,7 @@ FILE_BOF_BUFFER pub/NetBSD/README.export-control -lrwxrwxr-x 1 root wheel 32 Aug 16 2009 .message -> pub/NetBSD/README.export-control -total 98028 -total 98028 diff --git a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-extract/ftp.log b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-extract/ftp.log deleted file mode 100644 index e77f59dc44..0000000000 --- a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-extract/ftp.log +++ /dev/null @@ -1,21 +0,0 @@ -#separator \x09 -#set_separator , -#empty_field (empty) -#unset_field - -#path ftp -#open 2013-06-07-18-57-22 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p user password command arg mime_type file_size reply_code reply_msg tags data_channel.passive data_channel.orig_h data_channel.resp_h data_channel.resp_p extraction_file -#types time string addr port addr port string string string string string count count string table[string] bool addr addr port string -1329843175.680248 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PASV - - - 227 Entering Passive Mode (199,233,217,249,221,90) (empty) T 141.142.220.235 199.233.217.249 56666 - -1329843175.791528 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test LIST - - - 226 Transfer complete. (empty) - - - - - -1329843179.815947 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PASV - - - 227 Entering Passive Mode (199,233,217,249,221,91) (empty) T 141.142.220.235 199.233.217.249 56667 - -1329843193.984222 arKYeMETxOg 141.142.220.235 37604 199.233.217.249 56666 - - - - - - - (empty) - - - - ftp-item-pVhQhhFsB2b.dat -1329843193.984222 k6kgXLOoSKl 141.142.220.235 59378 199.233.217.249 56667 - - - - - - - (empty) - - - - ftp-item-fFCPkV1sEsc.dat -1329843179.926563 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test RETR ftp://199.233.217.249/./robots.txt text/plain 77 226 Transfer complete. (empty) - - - - - -1329843194.040188 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PORT 141,142,220,235,131,46 - - 200 PORT command successful. (empty) F 199.233.217.249 141.142.220.235 33582 - -1329843194.095782 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test LIST - - - 226 Transfer complete. (empty) - - - - - -1329843197.672179 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PORT 141,142,220,235,147,203 - - 200 PORT command successful. (empty) F 199.233.217.249 141.142.220.235 37835 - -1329843199.968212 nQcgTWjvg4c 199.233.217.249 61920 141.142.220.235 33582 - - - - - - - (empty) - - - - ftp-item-g3zS3MuJFh.dat -1329843197.727769 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test RETR ftp://199.233.217.249/./robots.txt text/plain 77 226 Transfer complete. (empty) - - - - - -1329843200.079930 j4u32Pc5bif 199.233.217.249 61918 141.142.220.235 37835 - - - - - - - (empty) - - - - ftp-item-lMf4UWRkEO5.dat -#close 2013-06-07-18-57-22 diff --git a/testing/btest/Baseline/scripts.base.protocols.http.100-continue/http.log b/testing/btest/Baseline/scripts.base.protocols.http.100-continue/http.log index edbee28991..a81c0d4a2d 100644 --- a/testing/btest/Baseline/scripts.base.protocols.http.100-continue/http.log +++ b/testing/btest/Baseline/scripts.base.protocols.http.100-continue/http.log @@ -3,8 +3,8 @@ #empty_field (empty) #unset_field - #path http -#open 2013-05-21-21-11-24 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extracted_request_files extracted_response_files -#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string vector[string] vector[string] -1237440095.634312 UWkUyAuUGXf 192.168.3.103 54102 128.146.216.51 80 1 POST www.osu.edu / - curl/7.17.1 (i386-apple-darwin8.11.1) libcurl/7.17.1 zlib/1.2.3 2001 60731 200 OK 100 Continue - (empty) - - - text/html - - - -#close 2013-05-21-21-11-24 +#open 2013-07-25-19-39-08 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied orig_fuids orig_mime_types resp_fuids resp_mime_types +#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] vector[string] vector[string] vector[string] vector[string] +1237440095.634312 UWkUyAuUGXf 192.168.3.103 54102 128.146.216.51 80 1 POST www.osu.edu / - curl/7.17.1 (i386-apple-darwin8.11.1) libcurl/7.17.1 zlib/1.2.3 2001 60731 200 OK 100 Continue - (empty) - - - 8TXBHVmBGD7 text/plain ATGo7hdUXdi text/html +#close 2013-07-25-19-39-08 diff --git a/testing/btest/Baseline/scripts.base.protocols.http.http-extract-files/http-item.dat b/testing/btest/Baseline/scripts.base.protocols.http.http-extract-files/http-item.dat deleted file mode 100644 index 73c369dd14..0000000000 --- a/testing/btest/Baseline/scripts.base.protocols.http.http-extract-files/http-item.dat +++ /dev/null @@ -1,304 +0,0 @@ - -ICIR - -ICIR
    -

    -ICIR (The ICSI Center for Internet Research) -is a -non-profit -research institute at -ICSI -in -Berkeley, -California.
    -For the three years from 1999 to 2001 we were named -ACIRI, the AT&T Center for Internet Research at ICSI, -and were funded by AT&T.
    - -The goals of ICIR are to: -

      -
    • Pursue research on the Internet architecture and related networking issues, -
    • -Participate actively in the research (SIGCOMM and IRTF) and -standards (IETF) communities, -
    • Bridge the gap between the Internet research community and commercial -interests by providing a neutral forum where topics of mutual technical -interest can be addressed. -
    -

    - -


    - -
    - - - - - - - - - - -
    - -

    -People -

    - - -
    - -

    -Publications -

    - - -

    -Projects -

    - - - -
    - -

    Research

    -   Transport and Congestion - - -   Traffic and Topology -
      -
    • -IDMaps -(Internet Distance Mapping). -
    • The -Internet Traffic Archive. -
    • -MINC -(Multicast-based Inference of Network-internal Characteristics). -
    • -NIMI -(National Internet Measurement Infrastructure). -
    - -

    - -Collaborators -

    - - - -
    -
    - -
    -

    Information for visitors and local users.

    -
    -Last modified: June 2004. Copyright notice. - -Older versions of this web page, in its ACIRI incarnation.. -
    -For more information about this server, mail www@aciri.org. -
    -To report unusual activity by any of our hosts, mail abuse@aciri.org. - diff --git a/testing/btest/Baseline/scripts.base.protocols.http.http-extract-files/http.log b/testing/btest/Baseline/scripts.base.protocols.http.http-extract-files/http.log deleted file mode 100644 index 53b80e5e9e..0000000000 --- a/testing/btest/Baseline/scripts.base.protocols.http.http-extract-files/http.log +++ /dev/null @@ -1,10 +0,0 @@ -#separator \x09 -#set_separator , -#empty_field (empty) -#unset_field - -#path http -#open 2013-06-07-19-04-27 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extracted_request_files extracted_response_files -#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string vector[string] vector[string] -1128727435.634189 arKYeMETxOg 141.42.64.125 56730 125.190.109.199 80 1 GET www.icir.org / - Wget/1.10 0 9130 200 OK - - - (empty) - - - text/html - - http-item-54zlJFqn0x6.dat -#close 2013-06-07-19-04-27 diff --git a/testing/btest/Baseline/scripts.base.protocols.http.http-methods/http.log b/testing/btest/Baseline/scripts.base.protocols.http.http-methods/http.log index 54a75f4697..674e355631 100644 --- a/testing/btest/Baseline/scripts.base.protocols.http.http-methods/http.log +++ b/testing/btest/Baseline/scripts.base.protocols.http.http-methods/http.log @@ -3,56 +3,56 @@ #empty_field (empty) #unset_field - #path http -#open 2013-05-21-21-11-25 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extracted_request_files extracted_response_files -#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string vector[string] vector[string] -1354328870.191989 UWkUyAuUGXf 128.2.6.136 46562 173.194.75.103 80 1 OPTIONS www.google.com * - - 0 962 405 Method Not Allowed - - - (empty) - - - text/html - - - -1354328874.237327 arKYeMETxOg 128.2.6.136 46563 173.194.75.103 80 1 OPTIONS www.google.com HTTP/1.1 - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328874.299063 k6kgXLOoSKl 128.2.6.136 46564 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328874.342591 nQcgTWjvg4c 128.2.6.136 46565 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328874.364020 j4u32Pc5bif 128.2.6.136 46566 173.194.75.103 80 1 GET www.google.com / - - 0 43911 200 OK - - - (empty) - - - text/html - - - -1354328878.470424 TEfuqmmG4bh 128.2.6.136 46567 173.194.75.103 80 1 GET www.google.com / - - 0 43983 200 OK - - - (empty) - - - text/html - - - -1354328882.575456 FrJExwHcSal 128.2.6.136 46568 173.194.75.103 80 1 GET www.google.com /HTTP/1.1 - - 0 1207 403 Forbidden - - - (empty) - - - text/html - - - -1354328882.928027 5OKnoww6xl4 128.2.6.136 46569 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328882.968948 3PKsZ2Uye21 128.2.6.136 46570 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328882.990373 VW0XPVINV8a 128.2.6.136 46571 173.194.75.103 80 1 GET www.google.com / - - 0 43913 200 OK - - - (empty) - - - text/html - - - -1354328887.114613 fRFu0wcOle6 128.2.6.136 46572 173.194.75.103 80 0 - - - - - 0 961 405 Method Not Allowed - - - (empty) - - - text/html - - - -1354328891.161077 qSsw6ESzHV4 128.2.6.136 46573 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328891.204740 iE6yhOq3SF 128.2.6.136 46574 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328891.245592 GSxOnSLghOa 128.2.6.136 46575 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328891.287655 qCaWGmzFtM5 128.2.6.136 46576 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328891.309065 70MGiRM1Qf4 128.2.6.136 46577 173.194.75.103 80 1 CCM_POST www.google.com / - - 0 963 405 Method Not Allowed - - - (empty) - - - text/html - - - -1354328895.355012 h5DsfNtYzi1 128.2.6.136 46578 173.194.75.103 80 1 CCM_POST www.google.com /HTTP/1.1 - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328895.416133 P654jzLoe3a 128.2.6.136 46579 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328895.459490 Tw8jXtpTGu6 128.2.6.136 46580 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328895.480865 c4Zw9TmAE05 128.2.6.136 46581 173.194.75.103 80 1 CCM_POST www.google.com / - - 0 963 405 Method Not Allowed - - - (empty) - - - text/html - - - -1354328899.526682 EAr0uf4mhq 128.2.6.136 46582 173.194.75.103 80 1 CONNECT www.google.com / - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328903.572533 GvmoxJFXdTa 128.2.6.136 46583 173.194.75.103 80 1 CONNECT www.google.com /HTTP/1.1 - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328903.634196 0Q4FH8sESw5 128.2.6.136 46584 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328903.676395 slFea8xwSmb 128.2.6.136 46585 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328903.697693 UfGkYA2HI2g 128.2.6.136 46586 173.194.75.103 80 1 CONNECT www.google.com / - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328907.743696 i2rO3KD1Syg 128.2.6.136 46587 173.194.75.103 80 1 TRACE www.google.com / - - 0 960 405 Method Not Allowed - - - (empty) - - - text/html - - - -1354328911.790590 2cx26uAvUPl 128.2.6.136 46588 173.194.75.103 80 1 TRACE www.google.com /HTTP/1.1 - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328911.853464 BWaU4aSuwkc 128.2.6.136 46589 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328911.897044 10XodEwRycf 128.2.6.136 46590 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328911.918511 zno26fFZkrh 128.2.6.136 46591 173.194.75.103 80 1 TRACE www.google.com / - - 0 960 405 Method Not Allowed - - - (empty) - - - text/html - - - -1354328915.964678 v5rgkJBig5l 128.2.6.136 46592 173.194.75.103 80 1 DELETE www.google.com / - - 0 961 405 Method Not Allowed - - - (empty) - - - text/html - - - -1354328920.010458 eWZCH7OONC1 128.2.6.136 46593 173.194.75.103 80 1 DELETE www.google.com /HTTP/1.1 - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328920.072101 0Pwk3ntf8O3 128.2.6.136 46594 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328920.114526 0HKorjr8Zp7 128.2.6.136 46595 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328920.136714 yC2d6kVg709 128.2.6.136 46596 173.194.75.103 80 1 DELETE www.google.com / - - 0 961 405 Method Not Allowed - - - (empty) - - - text/html - - - -1354328924.183211 VcgagLjnO92 128.2.6.136 46597 173.194.75.103 80 1 PUT www.google.com / - - 0 934 411 Length Required - - - (empty) - - - text/html - - - -1354328924.224567 bdRoHfaPBo3 128.2.6.136 46598 173.194.75.103 80 1 PUT www.google.com /HTTP/1.1 - - 0 934 411 Length Required - - - (empty) - - - text/html - - - -1354328924.287402 zHqb7t7kv28 128.2.6.136 46599 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328924.328257 rrZWoMUQpv8 128.2.6.136 46600 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328924.350343 xNYSS2hJkle 128.2.6.136 46601 173.194.75.103 80 1 PUT www.google.com / - - 0 934 411 Length Required - - - (empty) - - - text/html - - - -1354328924.391728 vMVjlplKKbd 128.2.6.136 46602 173.194.75.103 80 1 POST www.google.com / - - 0 934 411 Length Required - - - (empty) - - - text/html - - - -1354328924.433150 3omNawSNrxj 128.2.6.136 46603 173.194.75.103 80 1 POST www.google.com /HTTP/1.1 - - 0 934 411 Length Required - - - (empty) - - - text/html - - - -1354328924.496732 Rv8AJVfi9Zi 128.2.6.136 46604 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328924.537671 wEyF3OvvcQe 128.2.6.136 46605 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - +#open 2013-07-25-19-41-27 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied orig_fuids orig_mime_types resp_fuids resp_mime_types +#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] vector[string] vector[string] vector[string] vector[string] +1354328870.191989 UWkUyAuUGXf 128.2.6.136 46562 173.194.75.103 80 1 OPTIONS www.google.com * - - 0 962 405 Method Not Allowed - - - (empty) - - - - - VTrFjxi3V27 text/html +1354328874.237327 arKYeMETxOg 128.2.6.136 46563 173.194.75.103 80 1 OPTIONS www.google.com HTTP/1.1 - - 0 925 400 Bad Request - - - (empty) - - - - - jeoiUX9q8v9 text/html +1354328874.299063 k6kgXLOoSKl 128.2.6.136 46564 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - - - 6dL7NPgFhil text/html +1354328874.342591 nQcgTWjvg4c 128.2.6.136 46565 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - - - cix6gzDRCob text/html +1354328874.364020 j4u32Pc5bif 128.2.6.136 46566 173.194.75.103 80 1 GET www.google.com / - - 0 43911 200 OK - - - (empty) - - - - - tCZHDKUkBdi text/html +1354328878.470424 TEfuqmmG4bh 128.2.6.136 46567 173.194.75.103 80 1 GET www.google.com / - - 0 43983 200 OK - - - (empty) - - - - - iVzFNTeQnnc text/html +1354328882.575456 FrJExwHcSal 128.2.6.136 46568 173.194.75.103 80 1 GET www.google.com /HTTP/1.1 - - 0 1207 403 Forbidden - - - (empty) - - - - - boBAqw2JcFi text/html +1354328882.928027 5OKnoww6xl4 128.2.6.136 46569 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - - - r3w183FJvW3 text/html +1354328882.968948 3PKsZ2Uye21 128.2.6.136 46570 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - - - bncugeoItlf text/html +1354328882.990373 VW0XPVINV8a 128.2.6.136 46571 173.194.75.103 80 1 GET www.google.com / - - 0 43913 200 OK - - - (empty) - - - - - NkYD5vo8Gy text/html +1354328887.114613 fRFu0wcOle6 128.2.6.136 46572 173.194.75.103 80 0 - - - - - 0 961 405 Method Not Allowed - - - (empty) - - - - - S85THffBTLh text/html +1354328891.161077 qSsw6ESzHV4 128.2.6.136 46573 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - - - 2m6kUZZS0wd text/html +1354328891.204740 iE6yhOq3SF 128.2.6.136 46574 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - - - UoqtpOgJZSk text/html +1354328891.245592 GSxOnSLghOa 128.2.6.136 46575 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - - - mqs8p4wwsS7 text/html +1354328891.287655 qCaWGmzFtM5 128.2.6.136 46576 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - - - S36eCQJUY5k text/html +1354328891.309065 70MGiRM1Qf4 128.2.6.136 46577 173.194.75.103 80 1 CCM_POST www.google.com / - - 0 963 405 Method Not Allowed - - - (empty) - - - - - LeNRDWYrpS7 text/html +1354328895.355012 h5DsfNtYzi1 128.2.6.136 46578 173.194.75.103 80 1 CCM_POST www.google.com /HTTP/1.1 - - 0 925 400 Bad Request - - - (empty) - - - - - ZwKUASlWzYk text/html +1354328895.416133 P654jzLoe3a 128.2.6.136 46579 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - - - uj62KNQhsG3 text/html +1354328895.459490 Tw8jXtpTGu6 128.2.6.136 46580 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - - - taBxWzrYquk text/html +1354328895.480865 c4Zw9TmAE05 128.2.6.136 46581 173.194.75.103 80 1 CCM_POST www.google.com / - - 0 963 405 Method Not Allowed - - - (empty) - - - - - bHBxZULKI0k text/html +1354328899.526682 EAr0uf4mhq 128.2.6.136 46582 173.194.75.103 80 1 CONNECT www.google.com / - - 0 925 400 Bad Request - - - (empty) - - - - - t6k8zHaGZk5 text/html +1354328903.572533 GvmoxJFXdTa 128.2.6.136 46583 173.194.75.103 80 1 CONNECT www.google.com /HTTP/1.1 - - 0 925 400 Bad Request - - - (empty) - - - - - c11un7ZO6nc text/html +1354328903.634196 0Q4FH8sESw5 128.2.6.136 46584 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - - - iWCHzW5XJWk text/html +1354328903.676395 slFea8xwSmb 128.2.6.136 46585 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - - - dzvHktkjD9a text/html +1354328903.697693 UfGkYA2HI2g 128.2.6.136 46586 173.194.75.103 80 1 CONNECT www.google.com / - - 0 925 400 Bad Request - - - (empty) - - - - - vEO9iYqh3Zc text/html +1354328907.743696 i2rO3KD1Syg 128.2.6.136 46587 173.194.75.103 80 1 TRACE www.google.com / - - 0 960 405 Method Not Allowed - - - (empty) - - - - - 8seYaeRVuV2 text/html +1354328911.790590 2cx26uAvUPl 128.2.6.136 46588 173.194.75.103 80 1 TRACE www.google.com /HTTP/1.1 - - 0 925 400 Bad Request - - - (empty) - - - - - 0kkHkmLHFl3 text/html +1354328911.853464 BWaU4aSuwkc 128.2.6.136 46589 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - - - koHEYsvMVBa text/html +1354328911.897044 10XodEwRycf 128.2.6.136 46590 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - - - 50tlwxQjBCb text/html +1354328911.918511 zno26fFZkrh 128.2.6.136 46591 173.194.75.103 80 1 TRACE www.google.com / - - 0 960 405 Method Not Allowed - - - (empty) - - - - - DdECXqOZjXh text/html +1354328915.964678 v5rgkJBig5l 128.2.6.136 46592 173.194.75.103 80 1 DELETE www.google.com / - - 0 961 405 Method Not Allowed - - - (empty) - - - - - LIZQeBP0Coi text/html +1354328920.010458 eWZCH7OONC1 128.2.6.136 46593 173.194.75.103 80 1 DELETE www.google.com /HTTP/1.1 - - 0 925 400 Bad Request - - - (empty) - - - - - hjPo0BdP973 text/html +1354328920.072101 0Pwk3ntf8O3 128.2.6.136 46594 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - - - d6K2onvteNa text/html +1354328920.114526 0HKorjr8Zp7 128.2.6.136 46595 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - - - BY1g634OMv6 text/html +1354328920.136714 yC2d6kVg709 128.2.6.136 46596 173.194.75.103 80 1 DELETE www.google.com / - - 0 961 405 Method Not Allowed - - - (empty) - - - - - 5aAa2m40fZd text/html +1354328924.183211 VcgagLjnO92 128.2.6.136 46597 173.194.75.103 80 1 PUT www.google.com / - - 0 934 411 Length Required - - - (empty) - - - - - y3Syn85ve8e text/html +1354328924.224567 bdRoHfaPBo3 128.2.6.136 46598 173.194.75.103 80 1 PUT www.google.com /HTTP/1.1 - - 0 934 411 Length Required - - - (empty) - - - - - P92nMD5z6D4 text/html +1354328924.287402 zHqb7t7kv28 128.2.6.136 46599 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - - - qIPObDBIhSj text/html +1354328924.328257 rrZWoMUQpv8 128.2.6.136 46600 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - - - su86MWxyjne text/html +1354328924.350343 xNYSS2hJkle 128.2.6.136 46601 173.194.75.103 80 1 PUT www.google.com / - - 0 934 411 Length Required - - - (empty) - - - - - r2aysGE6ve8 text/html +1354328924.391728 vMVjlplKKbd 128.2.6.136 46602 173.194.75.103 80 1 POST www.google.com / - - 0 934 411 Length Required - - - (empty) - - - - - Zosv3c0p2Zb text/html +1354328924.433150 3omNawSNrxj 128.2.6.136 46603 173.194.75.103 80 1 POST www.google.com /HTTP/1.1 - - 0 934 411 Length Required - - - (empty) - - - - - L02QmCl2lX4 text/html +1354328924.496732 Rv8AJVfi9Zi 128.2.6.136 46604 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - - - uh9TwTMdWI9 text/html +1354328924.537671 wEyF3OvvcQe 128.2.6.136 46605 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - - - 4gLQ9WVkuYd text/html 1354328924.559704 E490YZTUozc 128.2.6.136 46606 173.194.75.103 80 1 HEAD www.google.com / - - 0 0 200 OK - - - (empty) - - - - - - - 1354328928.625437 YIeWJmXWNWj 128.2.6.136 46607 173.194.75.103 80 1 HEAD www.google.com / - - 0 0 200 OK - - - (empty) - - - - - - - 1354328932.692706 ydiZblvsYri 128.2.6.136 46608 173.194.75.103 80 1 HEAD www.google.com /HTTP/1.1 - - 0 0 400 Bad Request - - - (empty) - - - - - - - -1354328932.754657 HFYOnBqSE5e 128.2.6.136 46609 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -1354328932.796568 JcUvhfWUMgd 128.2.6.136 46610 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - - -#close 2013-05-21-21-11-25 +1354328932.754657 HFYOnBqSE5e 128.2.6.136 46609 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - - - NIV5LGdqSk2 text/html +1354328932.796568 JcUvhfWUMgd 128.2.6.136 46610 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - - - SlC7NZIgx1d text/html +#close 2013-07-25-19-41-27 diff --git a/testing/btest/Baseline/scripts.base.protocols.http.http-mime-and-md5/http.log b/testing/btest/Baseline/scripts.base.protocols.http.http-mime-and-md5/http.log deleted file mode 100644 index 97e797b4fb..0000000000 --- a/testing/btest/Baseline/scripts.base.protocols.http.http-mime-and-md5/http.log +++ /dev/null @@ -1,14 +0,0 @@ -#separator \x09 -#set_separator , -#empty_field (empty) -#unset_field - -#path http -#open 2013-05-21-21-11-25 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extracted_request_files extracted_response_files -#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string vector[string] vector[string] -1258577884.844956 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 1 GET www.mozilla.org /style/enhanced.css http://www.mozilla.org/projects/calendar/ Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 2675 200 OK - - - (empty) - - - text/plain - - - -1258577884.960135 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 2 GET www.mozilla.org /script/urchin.js http://www.mozilla.org/projects/calendar/ Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 21421 200 OK - - - (empty) - - - text/plain - - - -1258577885.317160 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 3 GET www.mozilla.org /images/template/screen/bullet_utility.png http://www.mozilla.org/style/screen.css Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 94 200 OK - - - (empty) - - - image/gif - - - -1258577885.349639 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 4 GET www.mozilla.org /images/template/screen/key-point-top.png http://www.mozilla.org/style/screen.css Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 2349 200 OK - - - (empty) - - - image/png e0029eea80812e9a8e57b8d05d52938a - - -1258577885.394612 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 5 GET www.mozilla.org /projects/calendar/images/header-sunbird.png http://www.mozilla.org/projects/calendar/calendar.css Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 27579 200 OK - - - (empty) - - - image/png 30aa926344f58019d047e85ba049ca1e - - -#close 2013-05-21-21-11-25 diff --git a/testing/btest/Baseline/scripts.base.protocols.http.http-pipelining/http.log b/testing/btest/Baseline/scripts.base.protocols.http.http-pipelining/http.log index e22fb53103..6779485f91 100644 --- a/testing/btest/Baseline/scripts.base.protocols.http.http-pipelining/http.log +++ b/testing/btest/Baseline/scripts.base.protocols.http.http-pipelining/http.log @@ -3,12 +3,12 @@ #empty_field (empty) #unset_field - #path http -#open 2013-05-21-21-11-25 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied md5 extracted_request_files extracted_response_files -#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string vector[string] vector[string] -1258577884.844956 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 1 GET www.mozilla.org /style/enhanced.css http://www.mozilla.org/projects/calendar/ Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 2675 200 OK - - - (empty) - - - - - - -1258577884.960135 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 2 GET www.mozilla.org /script/urchin.js http://www.mozilla.org/projects/calendar/ Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 21421 200 OK - - - (empty) - - - - - - -1258577885.317160 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 3 GET www.mozilla.org /images/template/screen/bullet_utility.png http://www.mozilla.org/style/screen.css Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 94 200 OK - - - (empty) - - - - - - -1258577885.349639 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 4 GET www.mozilla.org /images/template/screen/key-point-top.png http://www.mozilla.org/style/screen.css Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 2349 200 OK - - - (empty) - - - - - - -1258577885.394612 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 5 GET www.mozilla.org /projects/calendar/images/header-sunbird.png http://www.mozilla.org/projects/calendar/calendar.css Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 27579 200 OK - - - (empty) - - - - - - -#close 2013-05-21-21-11-25 +#open 2013-07-25-19-43-06 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied orig_fuids orig_mime_types resp_fuids resp_mime_types +#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] vector[string] vector[string] vector[string] vector[string] +1258577884.844956 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 1 GET www.mozilla.org /style/enhanced.css http://www.mozilla.org/projects/calendar/ Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 2675 200 OK - - - (empty) - - - - - XRu8VItOvLc text/plain +1258577884.960135 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 2 GET www.mozilla.org /script/urchin.js http://www.mozilla.org/projects/calendar/ Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 21421 200 OK - - - (empty) - - - - - m1D1wMxW9y8 text/plain +1258577885.317160 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 3 GET www.mozilla.org /images/template/screen/bullet_utility.png http://www.mozilla.org/style/screen.css Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 94 200 OK - - - (empty) - - - - - ZwnCaxWANNb image/gif +1258577885.349639 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 4 GET www.mozilla.org /images/template/screen/key-point-top.png http://www.mozilla.org/style/screen.css Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 2349 200 OK - - - (empty) - - - - - 3WVi9g0Caei image/png +1258577885.394612 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 5 GET www.mozilla.org /projects/calendar/images/header-sunbird.png http://www.mozilla.org/projects/calendar/calendar.css Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 27579 200 OK - - - (empty) - - - - - ta9bGBff1Wl image/png +#close 2013-07-25-19-43-06 diff --git a/testing/btest/Baseline/scripts.base.protocols.http.multipart-extract/http.log b/testing/btest/Baseline/scripts.base.protocols.http.multipart-extract/http.log index 0bd15badef..ae71680dfa 100644 --- a/testing/btest/Baseline/scripts.base.protocols.http.multipart-extract/http.log +++ b/testing/btest/Baseline/scripts.base.protocols.http.multipart-extract/http.log @@ -3,8 +3,8 @@ #empty_field (empty) #unset_field - #path http -#open 2013-06-07-19-57-15 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extracted_request_files extracted_response_files -#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string vector[string] vector[string] -1369159408.455878 UWkUyAuUGXf 141.142.228.5 57262 54.243.88.146 80 1 POST httpbin.org /post - curl/7.30.0 370 465 200 OK - - - (empty) - - - text/plain - http-item-lcf92jVphSl.dat,http-item-z8gOS6arddh.dat,http-item-tBYz7eElzTb.dat http-item-GVJrSB2Vxk6.dat -#close 2013-06-07-19-57-15 +#open 2013-07-25-19-50-23 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied orig_fuids orig_mime_types resp_fuids resp_mime_types +#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] vector[string] vector[string] vector[string] vector[string] +1369159408.455878 UWkUyAuUGXf 141.142.228.5 57262 54.243.88.146 80 1 POST httpbin.org /post - curl/7.30.0 370 465 200 OK - - - (empty) - - - UB09X6VFGTd,wFP689pOsIa,g5yDIGBH4i5 text/plain,text/plain,text/plain yv4qm3EsdOc text/plain +#close 2013-07-25-19-50-23 diff --git a/testing/btest/Baseline/scripts.base.protocols.irc.basic/irc.log b/testing/btest/Baseline/scripts.base.protocols.irc.basic/irc.log index 64bdb41861..8249c94938 100644 --- a/testing/btest/Baseline/scripts.base.protocols.irc.basic/irc.log +++ b/testing/btest/Baseline/scripts.base.protocols.irc.basic/irc.log @@ -3,11 +3,11 @@ #empty_field (empty) #unset_field - #path irc -#open 2013-03-27-18-51-40 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p nick user command value addl dcc_file_name dcc_file_size extraction_file +#open 2013-07-25-19-51-43 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p nick user command value addl dcc_file_name dcc_file_size fuid #types time string addr port addr port string string string string string string count string 1311189164.119437 UWkUyAuUGXf 192.168.1.77 57640 66.198.80.67 6667 - - NICK bloed - - - - 1311189164.119437 UWkUyAuUGXf 192.168.1.77 57640 66.198.80.67 6667 bloed - USER sdkfje sdkfje Montreal.QC.CA.Undernet.org dkdkrwq - - - 1311189174.474127 UWkUyAuUGXf 192.168.1.77 57640 66.198.80.67 6667 bloed sdkfje JOIN #easymovies (empty) - - - 1311189316.326025 UWkUyAuUGXf 192.168.1.77 57640 66.198.80.67 6667 bloed sdkfje DCC #easymovies (empty) ladyvampress-default(2011-07-07)-OS.zip 42208 - -#close 2013-03-27-18-51-40 +#close 2013-07-25-19-51-43 diff --git a/testing/btest/Baseline/scripts.base.protocols.irc.dcc-extract/irc-dcc-item.dat b/testing/btest/Baseline/scripts.base.protocols.irc.dcc-extract/irc-dcc-item.dat deleted file mode 100644 index d4ec9e374b118f65fbb1f67c14ee1a15a26e58e7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 42208 zcmV(&K;geoO9KQH000080IopiK0{>y#G<1B069wo04D%_0Bm7od3IrKaB^jHb1h_L zW?^+~bSN?~F)=MLH!UzXDJ@S^E_8TwQ$%P0tN;K200;o*oh`XzWMF9#VPIe|V1NK0 zAZBC*Gg(D}MCT7p)zh*&8`5UIR(ipVTaJ;5fq{u3fq?}iDT0Cw3 z@#jCTYGsv|y{cBut{(bk^&f*-ZSL?FwO!raeWd@b@rldD|Ec`yS5-P&5&5sb`iEP2 z^*@8cXXI*!zw*thblvIOPKI8sdbjEuwJIBFtujK0)gS(eWBvTk)n9-3@%x{@`|10Y zs$c%_Cs{db_Y9kAyRy*(7xLOwOpjJm!1l=pzJ^lAj7ytSCtAGFaTPfS{&8iQ-efjVHU}SYV z$hPw;gjH!|XoTAdP!_%Jr40hiyA$BPJm}Txx{bbOZB^*^CkPY@2>xLBfZB5T;D4%?&IMaX?CJ8&Q+rG z+VZ5{Go*Cgh)_1p8PN_wm(qx>2+C^b#fi?pxx0J3CiZZ5C+@C~u5G2Im0+4xSYoTV zxjjUna_#vdN`DGHz7zI?F+AxH3~BGIXeI6$*jYIvRX4AThA@pT;2Plc)DJ+ zhxQ8@!i}uoTXzIwcm^FlDV)fxn3Rrw5Lw+*0m39lk&K>f%NGTrkpl!rMFr} zq6c@9wK|`ul1ED$lohPNZF{6;op+rJP(SSJiL+^soE$_JIaZvAuAf8#BB)}M82@Usq zp>1T8ce0we&M4eHr!{E;*J`)r31n>sdl*uF?l4e&MO!%n%P4QipX4~gx+KVosL3z( z4Xo{Sp#`&kEk0b|X93G2`({!&pS#jWpaJIl1!UKolW4RtuwIRB28sWTZfwnyVlhHt zB>YPix07S#6DL^X;&8l>DNdppJ6I6!Aora9(1(GAw$R3Aj$CdsAbAb#O z%SI_UFj4`aH4@}To#0@|t3k3uZXfGMPB2UD;bX|Ac2duong?dRvj<)F+SyB;;tb6b zhs#pSug%N@dy}pcZpa%I7M&v!X-Cf*y`c9X-y^q46GTo*h`qV zO+pS5=B!dJfdc`*8qRB%IEYFeH0)4k#b5s%nU1`Ld zp1cjkAMH@?M_9<2W+K5zAWve$lAyCNyHM2#E6BFPtc2&w;0+Ta3mFQWQQ|1TL7W7I z()p`kIG{cQCpiyB(3P}6yaWDxMk<&s>}0DU(Dg9wK@+wDxRf{E#Ew!4ez_v;01O~R z2H#eplpV00hh^+Ekp0($hM< zac&vRX*1Y433}xGK;wGR4~6Jq)d87-uWU|a_#_D6c9fVRrY)9V8=`Tr9O?o;dP3MB zDlco7gIW}BbEg3ph}2%6Z;v;JtlVi~NMEo4(Gl9jy4HEGL&C$bywIg2@yjb6BYd9- zU!D^_g+lPwOOua)*o4C;XJrHFos)k?jEHIK9J2lQ^=X+*+TK!9C7STXC*H?6C!YD) zb|)PNbJFB`4goQU*u*Sg1f!HfjmEo|+SD+ez%{c6GM7%ej7?UbT~=jv28^O67)R+7Vp7#aMDBiBtg2_aM9 z4Z+o;3a+Ktv6Y{7`xYw)s0AklS_tCY8=wKyQd;L*Y*HfjIcQSSESY;YE2?Da%A9q* ze_C^DLcq#4a{0j#FIcG9hon|Ma8iw5;gRVwDMYx<0pZt5_Rhb`)bLRyw@-fcwMl*A zOlmiDog(pwXcj^eCVF^3)MG)+y9p?HHFs{>-LaMSrWsj zv?nRw6GS0ruZ}VPFe;rGx^p3f&q3dFl_l49(U+Yl!rR~qTUEN2GEWKr$Y$ol;1Bu$ z*K;l)DVfjPuobyp?v>A^(Gyq?SMQx4Qwoc{5AF6r$m)QYJSI4B0$fCq-G8FQLbT+& zUddDfHd#3<5aPVU{gE%shIVXTq z<&7Ej*y^QA@xu?x0aou2j*6YsK9@h?ndAr3G2JiL&7(Fk8Kg?1F9Y$V>#aJ97NKw2 zTru1lY^6gt^srM}Ye;vc+HS^1B|`bZQq!OA&sGyCr2}eH*HX5vAV2@ z8Z3ZP*Tq~|O0pSr1z*-^yMI}hOgS*ZLch@jNWw4^jz4m`acsaUam=I11fDp02mBXJ zAmO>ZZYGL>$0M|Q^d|W8iVR|j*y3y#uQl~?!zOKK>0DX&?fa1*FP%73lg{1KZC?Qk9giyLnW%N z{<8Tf_4WHFe)Fc#*1?Ad9*m`u0W~p~b>cL8*kCHIA9FgfDI+A9!+LCK!Oe4xA$_UuIjXBt5ilNPapVfjwdYD4j3n`|qt(=@Oexp!3=M|#%pbQBcb}`|93e`}NhgzXe zg7R6^A7NZuXgPB#j+m}RA?xUA5Rad-6xiH8QP|16A7y=U0YkcQ*6R}89J=C7h~OM0%^Ic;`oJ#`FGLb9~G4a;17Q%SyZ7! zwDIhnGM1U8Nj5L84jpd1 zC~|_G(pR{X;Ova6XPG(`GLJG3+*mwc_`$l&EpmsA4~?FiE`}_;A@j=8psf6k8nAW| z^--_AIj5)=YDdQ4E|R&tH8jf_31FZsS43D#$pjd5y5{uxm!shS=2 zID6uR+Qro{b#oAbYw0y^s8006r-5apmyBf$idz^tD%GTrS;A+OW+)1k6QTrED4my6 zp=lvJD&5c_dVYt1i7YezzoTLUZ$_7_zqK#kOKP5t`!>3b37q7~xt?}UK0U(xgEPIX z^E}}D(a+t~?r;(Ka;TWaiRIXnOk(ejbB2%;qQPqlnrr1ewl78O$^|&u10B8O@AGoa zX~k>0wF7RO*Gtw}Rt60AQn*P#D%s{QC6SPbVP{EYAD_pF?~|)zJ`(KHUj%8N z54vKDUmLbv{036j!p=TiKpaB&Xa8(o8N zZ@)a!O)fLOL&zJPL>2V>03miX>aPJtzx$QbH$bp{(*8(x;vzu;e^}sM6V9W`n#0*spYG zW!`$Ry#)gYWPU}_75`G&ZgTBu3S zUV7PcaD*i=_BSkyjPW-Q^tkl72|ITaoOSWfS+1-nO}*%le%6*SyQUWA@WnHfG>H$K zNl+%JIkcL94-x4W4j8N&%j^qzPH&PZK(AvnpSi8qLKm|QFOq*v$^BReGE=Jlran!bKRP1-Z1^RKjQ2!bz?b9M!*7o%=Zhj4( zCvs!;{M9?c9QxsI6Y5)NTg~O%_}x?D!@Aj&DR=z{T`WCXI?5nE#fdfByV$HSzCGYf z?h*sp=n;S*#vB(9HqH}ECj>d>I1^_L%rKVCJd9EWA>FGt z<$P$nOVGjPl2~Dg9kw_0iSRS95cmlEqVqnVpF1TQYLBE2DAG{4{5)t%WEL)7g0|Tb zUE(nxB?2g26h{o5yT#!wf~1i+S1%tDT0vC$R%NeCn6vN($EfWNMtQCblXxDc?Nmf7%%Em`5<9esp8vdtdC9Mg#J#`h+(`5pq>M+}V z#fVTG%+@!jsY;(0LGBr=EMVto9?M*&nfbVpFgdt+ud))Cc;6H2+kP%OW}{rAVvI*P z+RKnis*-PE{KL!p%<+Xm{?1u*XY@2|5LY+^?Jk(dqgw9xl+LY&SdKnQAIAwS&Z-jl zH>a0*S4%I@xhVV;xKhK@4N!`cp*$6?*@ry}#DP+1`HmMh^AMenrAEYZF0IY-p_P@A zW2FS*{7LG38jj^cD+3Kq)ez~KiZbhn+la)V!V#MRqs0`}Vk|&UvsXJgn7lNwH9=Y{ zLPJR_dR|Q>Bu%fYUD>;Cn6nRzX>64@^qTG5FEBj>g2fBj1Z~2}f*cjdWF>1E%52NS z`hhJEwfKeR`Vl;cTOu;5^x0%pqHVv`Wh0dl>(H@bUfEGDAY)Ux^sa?~^>YIqgDzM= zW!2|C#!ynvB?TDvO%qzCk|5&Pp@mw-BG-ghLaX}*imIuMdsFAHr8{V8P&WYXTlxV( z;FHW0uDQt=&Qn{G6){aWXX_cQpzJr?1f{!fisacn#3XM`Q>*7r&6p)Sv;`#ZDN{aS z3$qaQ*hI43}mF5tMao2-Of>K0+n;$@Gz!xNAk{Nc4cXB zgI~*7>M0}{m1+K!MaX0XV0}Z?${@S0$#+qb$KozB4*v&&>%5C^ra&LRg*LwBoV1;| zs$tK^V!t@l6lyRITyLp$OzND!NK7pD^ZjZJcgJBeG*ME^mS*M4j?w`%FXHm`#hj3` z;R;QEIp~Z7;-;RIu|iil_Z)Wx33Mm52uC;bpoPuNT~Lgub*P`F>e)(Mk)97cq3NPbuB)*!CQ5Hr1(Y7=mVQmn^L{;>C*W4<6_`o z33G~&>n+D5s`PZ>858kHl=BbV+8alTj%%a4JP-R%?ffyJa6JUBoeA*R#7h`#h{JnJ zqvT9GbfOhG43wOpG1A$BW1@)>!47~@oI9NguGtDcy$mfyFEoCYBA+DYCqu>A6O^v+ z8=8lmKQ}khO76$r-}BQKJJx^mgt((n4^uB^TXDFtDTo5JrO}Jj;~1nlybcAJpc%mi zPlyJEeb-MSxFbwpNKqG?(fDk=sDSt0@|q{Ew&4hodM={h$tN5b_FRnB7iX(BGFS$xL z*LwRt#hCwV?_G8!Ns=tFeFOauF$;EP109C&x8DZ1XmSQM$-w}`G}-7y0i?RQy1T}; zhnGFBUytwQieEuFqH6v!5jC@0&0P(qvoa%7#Ld)HMdfki$blS+<^=!g3mGBbz}ezN zE@Ea>u0(jklj6wDbgH28?l*uSd23{X3M3M4&i^amcl7!26OaHz8$vsZ9bl}YtZEx~ zFCyfL%&w1n_E=_F8=|-0_wM5pnIv2NBX zinvcE!F#U6Oj-v(UGTdDd=?5g?nZ_X#wHZO>pjGC>tgr$O&9%BIS~7c0kNm@+s9GB zu@@-^&{<-G^i8s<+-@k0QGPi$6<6cFX5&z$#V3TTJ9u9yTL?w9{g{-1Y9q^?$qiZB z)Sd@ZYGRu^k6;RPQ+0+;!Y^&;GZ1@cQ=rJDUw~rR!Q!;_qD^-ZqlUlpWRR{NPJbyG z15YZshv)PWy*HJ{43C|tOq1Rb!u^PCt1^4~V0a+ABj(Up`W>G?jowjW!G(o0lhdLh zV#o(~kYZA6X7dF%DrS2XUr15~E$4&rTWAFELB#AZO{VqMb#lJR3wL4E7a+5KaD|DZ zXHl>7ZFKBd6o&L zQJ+Be7QV;hyoidXZgv3sG3A?h9WQEURF+jc*FkS-LRn@{jnFFDKJ@Fz!n$u-rE~O@lO;MKCBn z8+D_13Zs0@LxyT}$Z>JpAjZ8_;+LtPqO{pc?{Rgdq#M-_vDC*ESC)02|ei z8z?4YF7B79_MCpd7(X?b0@Kuvs+cGf{la~9DdUYmiaoxR6&iGpAQeJ2YCp)RASq#f zPFkBHStx=(R^q;Cce`J7FQGOwTrd*d!%GW?4qxMBAFywu!St%UR8f)M{rX{}zK-&{ zpWks_%gS+8pX~;Ri>kXK92(DfT*S_ZVk>_k75YGUDa=kC&f9SRR&czD8}y8 z%!&@=8$@bk9xaN4NUm)zx5M0rz&(n0HwRoVhyl$dTVZ1S*WeW%!^(|y3SI@1+ zSdBXR;G!39(lWb-8e3%fBp%>B1$p#$Z3T)Q8(%bq%DK&&j*KB9D!sKQdV;G=u`)$K zhpsq($HS_xBNpvEQ$TPIS+R3BEpoP2($X;_H+JA5#Kgl*{KJ)*Z&^}*U!izzIU(B& zY+>oQA9$S~_ON`HZ$&F<3dd|o(H9pZFLMTBv#ia)l1r~#`OaEyR@p~?Tss3*|v}j9W5!p6ZmRO|u8?F+Kg3z?(1 zF%+?g7=5*)Gg}^qK8Q$ZJ#YnzQ4mQT-^=%h`2s>~t<0!vYRfcrp~p7-s(59|B;HJ5 z#BJ4}$HKUc>R{6NOURls5X_e6TDvf%O<0ykd&RZAm~JglQ>UYS==zBxmxQ$DaGL$y z11wsIDkKLf_PEm-#P z-f-C`pWi3>_%$JBR8tH@cfNllA95Q-`SLyPV~ItH^h6<(OEce84IYB~U}RO*R%CVa zq`Zg3cJ=hgmz3B`0mkxB#sLE?ZNRKFe%57uGKiq!CqOjfDUeK_C~Epeo1zuNql)jK z6m|W?WET&ei~7k*rn?iYN;1_A#HP5aB;Dq__QwAa*>=*lFN+$Ze$y|>R)wv)>rBMC zOZ()yE34xzX{u~7v5UjsC&@3k!;68lB9h{g>wF+zu8Loy3ZpJN^aYvsGQK-#cS^LI zkgu#A^4%oHOfPz_koh0@OxlxkdM0|P7aDCWPg0q@Iri4XrNm?Csabb69wdn8z>9)R z6oi6bi1|9A+f)pZm9q3*D7vYX#NbmsToo0^8#0-0$WTspHTL%4y7sRv-u$(iicf#0HOGZJU~Oe4WQRd>aSI?4r|un^t*og6+$n^lF-03O zuS|*#?$}lXfq)Soow_xIrVb;c#sfOdf zD4&$F1>|~fj>iF?MC3++cq~_s>$c)S^{HHSJ8VF+69BHislUC#xMKa_Dsg^4EaAiA`W)dz+%Mm=|7(1V6tW#t6 zOmsQm!HbDCi@|L+u&vlVzIij~U`UTZK4fg4XlqVZ%wz!1$Ipl27BQc5a_i}%-8OW{ zBqg5|^wnqkmCT6!Y$|$(d(~&!^9M@k26adJlj~5nSvV;NppdJfrI!Rzb-7V#PKy~1 zyi+H;l74U!lU4j)IkIW{e~g|rF<3(#g1boIXT{T^;+)?Xc{R--{&ZyzOyU*N;YU;L z2h-WocV~b^O*iO3)FgT!2N&3YnLfrM!$samUKYyDB%VBakBXtwTq-)0Rr8f6Da)+B z5eY>0pF~y8l-2wQwW+{zYbpxAvrOh~ly)IltFvo-{TI^3c%X}A=c+Fp_)v=n_x@0p zLXiGb^1mnlsvjANaG7^l-O;!d9bwQkKLXs~bsn0Q`x`{WYTjgOx`*4;=a#apyzcpa zL)&0kG*uakvotnnCd2MXa;8PH82m8cN{L$)?f@X*Oi~s1q@=|_G45>3Gg#Wq4cIZ+ zFJiV4fTD3@h3SgZoX}(Ce*FUABFtIsf6b*#hFxBmNja9>RNRZ>HSemp(dS)?l*8me zmH-cqu?b|xU`w#m-doyxiLKEk=^NCJtzjDMyaZ*~^tu2yxOK#l)3WgpHFV9IUthRr znX$R-iZU>kWI)JXUnN7gkU5|0M+e}5G2a~m`C!&)GYY0eTEIYceFyvK z&$L*|l=N-BqjS)BpFMu2R-&P3i{Xe^+BbnIX4;l<*ys%#{dbV2nt@wb<84lub{8Ln ze9tR7%<52~FOiPndTrh2`GB1jyUyIs#oZY(s|4z6&;t#H=+X}!2TzYgI-Z{~Z|YpS z0VsxuZUJWydGBU0-Vw>B4?}Pq1r|ynvMF3+D%VB}TXNo?{<{EaoVHuQtjP!UC#WPo zJnzjIo2Tgt69>9h2~XRBE<^KvVoOkj-rD%ylhk`Yif$xssHz5+^)@JKn=~Ke9Q&Vi zTl$OIvu2r7;5V5TVf<|KGyAICG%y)`0Pz7EBi>E8sd26{3$C!F>tG`vrfA$(jMlq# z&5+d+j@rBZpfL3Wuixc~Nsc^!qnmv;uqDL`7;~1_o-k|j9+ZS-e2rkI0w7~3&7ydI zLHZosM4GG*xv(9>r#rihT!_rZ^ABno%l7Uy{c^uNWprdeOCQ2-osQ{50(FTj@n%2ZHprW{grj}5L*+jC z_##8%?=9Z-w+8NMiCR{}J=gb}THJm@pKu>g7)WOsNSj&b<^%)AbMGJ4s-vq6`woL4SlK!UrBfwB!#cctrC`Xu&~0p!dTwKD_$oO}X;iX$17x*0@i3f41sNO^D{ z%J@7m^X%_XVsJdBav&?l+$MX{%hg z5U7)V$Lw8Kc!&{tBCnaNtQzAxZk3M3S&vOS(WJauv`UX(oT8Q5H&@DYXLD{PHL>Blnf z!pQZeY6rt{aWuh)$2QjkfIE3A!8OGo)WYn`j@n{!6yxbx+dBCuug3G|d8!Hu=f)(K zq`3uw@zqa}gP|IzuE`JPtLY_knw+r9Jg{+c&WwiwI39wK-j2zXgIBiAy{JC9Ohj&h zsmr`@aW_%isb_PaF%-R{B>wZjOoReY9DAgAZl0ZqKAk;4(a=>REZmvmOmjvZ!G7|e z=+3<;T``|2Xx!+#nNstUrDrD#TV~1Sjh*VH4+QchR%E$jp>-HEpko=`BAF~nij|)p*amQWKL0IUrrkyaH-&rEjaVo=D>&rwZca@%01kx05_LOI$*eDCpa>zRPU{m7FshSF^i>rbTA$(_ABtax*(0~~$0%o% zC9*HH#?()!3fXs*UhMr!HejGPT|823{w^kakJpp5i64k2pMO;*nv%AVBs((GS!ZpI z9u~BeAPw@E4ch0c}7`*=9)|g?sCg+@G+w>1$8aiY}@D))zQuL%nqh*swldlR7<>QCMzd z?g?mOn$u7_p*)uY;=lp=9C;Yg5joFm`CK(FBd59?*wo_l`=Kj?El0B5LJl#s!3?-| z;zSY@mQx!|I;hicD%1%B@nK=>p~Y7xv4jaY6O6SNyC8ZQfO9F2-jes)#ZU1Pvvj)* zf!*{OEV%CnH@bF%xAoZ;12s{lfs`(9a+UYpc-XB51d%vpO`L!!!alrsW7^-LjiF5k zNmGttDCj%2^)8;qjm;>@&L^|uZN`9_$tHr8cv7KrpgoN|g{GhhB+e>^M5PY)3`So$ z-z<$eicLxOG5K)SG5Y)wI&)ybL;p|>z6E-m^Yy0*gD9rmXRHtNfG&?VPY&R;NhT*U zJJmoJlgjT-C$RRTC!$|OWfOm95GhQBJGbA5xrF;JW*QapbVGTb2FXmz{IH6)D4b;wCmgNR zne5g|$gHcv6Jda$Hs!|;VBMG1JA0{>tA|5p<8P$g;?OJPqF-nj|+7?dYqDzqwkADNjFA2-E4x^!>$c zlT#LoRn?V@oZIN<3eh#Tci^?)PNI0yP8+3TewnJ}>Cr!J`UO5vKMJTY$7)G>+*b{fdrU3RVodO$@Xp7L6aJI*v@WX@17F z^FHU{}5*j;X`;yu81^ zJLLXArB=+9vaZhqW{`VA>*1WM{!TGHk_}Vdtpi-(*whTu`_lD{%6mCTz@^Th1k>hA zj&L;ER+X0q9Vqs*dbkPtpHlnL))nzcRh`#K2ERsvZIBX9O@yG)@{INNDStRH9CMga z88G!j)Yf1K>LAijF@Irb5nm)6OATT@l@D^Cegu|4lSbj+u@PMkc8m$ggJEpITi_G# z7++Qo#APB5G$cIC1IS7K{K_)%ykD-|)^z@?Y2!#>)d(~kl3*0lK$S|3j647j5k+A? z8}f(0(nsz`S%HM4FzySWIS<`KM=`Tn3@|J!<#<19D$9_P*qLfaU**7%*MG*` zo9YJ<51RvIml-24gJBSMBd!Prtsz@H1S(WED8$jR03fvItw03%1?RXtTJ|Q)5zeKb zK;Dg+8pMyt4~GFs)j-Wco{d_5#?#MoJHXQvv-^Cy=^uskGy~{g*|3x4tuYsh9wJ6x ztKBaUuN_$k7j2_Z9~-|ZAJ0n-TBN_W4ICS&^%8nnz|C@ znRY;P@^jq2Am7@Du3Ar>O`(03T`xZ0hfb(3aL_hwQP?c0%pibQ`ap~cBiS+yU( zxU84=luV~QGg+3FoKfjWUP_LD2TCBK1NMM^@4P{dRj#Sh;H9{iZPB1Z+w2DN&SK^I z!({K*s@sg>6eC#*$tPxBKMsS+uIySb}or6)esCDEKDq$2Y!gp zhDWIlRN$q-VWz^R>ghpvDS$JXp2`BKEX;a|2WQfRIh&=Rabhl~s-!5(CO>WQ9*^|2 z#f6`+FKu1;Ix{om#I*>Ym$mprFSiwZ*kiQ5s_YV0!_-?|myC7QhSKpcz){k0%*&m3aEu2Im}_ad2((%)$6OOJ4wl zA!%jk&YmrN`)qfFLhbnr%jgFp_HUd~nQ-KHhZ%i@S7E-kvOuDgVjSk(#B2O2Pozy} zDejM@oB03wlkTSRcngE^CO^URN+81dzuZ3AyI+sxiyzD6M{m_`Y~sbCd@Od)B;ee? z+NM1x_R~K8mrHE~;8$PfuUt<0=ReuvuV2;M^E|JCSO2;Qs5$?);Z^9roChy5;JF%l z=GRBb77Tj)e0iApoUW1Ahv8o~+~x7-=RbeeIs4(Sxe(-Uv?C~zD$K}I)1^|@ldRZm z4a1YZv-w&p*Yi@u4J{ISUT`(Geyo`KxT3{az}MD)QqC+deI_yXV`6}~2}IhFwGpnB zEr7`UYYLcV_G2z~QkWfTR=${bNoH?v)BGMDHdUWMEy`etzqGg&>25{0`n9ZH8AlaZ z4M+_-aDf`m3jvGCkUhU9o%0!4YH2hd{B?Tg?Q)Wj=Ea<&|JoKuCb8}mttJI~ogx?V zC}{mm$-_ZzfRs%OBFwo=O?EvurQ)G#(pYm`sOJ)>{0J-)o>US2=8{Nl$$%hjG; z3KY*b}2Jpo%^lvU;G??{`XRmr|u*R7mscog;-0kpl) zO$`9-sJi=;2C@w)#c{B^*7Q|uX7WtBM-@Cq_)^j#%Tv0n_v(yrdCuIUs*Vi^UGMBL z_MxjA<%2}E%)^*lGP(>8d(pWRGYtbe7vqvyPg;B`Cu@vfcX^@SW@5%7C2z`lhF6UB z^YBT!X=%y90yxS&NXuPi;0UZ=H``(<&KdG!$jVF77OGTPAJNfnEFwvWGtN-r2G@&vM#R^_ zt$A@#7han8bC+Xp-$N+{ZE~8i_I-pzu&x*0p-yiEW zbhW{E#k^%(MI!=v8qSku<)LOG1o8mf4~*L(^hC@P{-gQr4@Mb@n;MVX_LA}|EJgHz zsGupY?dP>trL}eGa*f-v&Q8BEjsGC*_szcBcd#voMBc;TJKU;JNQDD9A?=NwyXSSgm zB)M2~=KO}b4J7XPCt1aZP~7`xS>=9KE}CcANs=%xw6OKrXjv15*6Y^8lpLPieh?M?#ACqnzFQm%IG=^TrB|Af&tWv#IWIWKl$ zzR^d=k8O#g;vkmAZ~j5wY!iWNN~~o!SqN`` zOo~X=%7}fH`XfwnER%l#8^=kzeB|aeOMz8p zFo_z;wC_0>ZQZW!?R>5EnBKz`97{(Ehf@2+gP=>;t;x|lBt$PWxeN1AfqQ~(=LSVn z+t0g+6p&o;F~vKD7rW^<8xusZPd33#@%#m*LRh24#cGcf_smZ@IbO&U7hrD5b>J

    o&!pvowVs zbr_et92a%?{!M1Hjzs^;%5V=<*!TLE)r3JF<%^aFTNO&pDbg-Vo2zWo6Ivg%a{NrC zg9wec*hrhV8}y<>UW|7kIw%2qXur?iR@;AkeS?*JsLFijx&NjFM@;vh6rqLWTIoDa1hF?laO zh{g6kZ>eXHGUJNTp0ang{`2dxh!OBxiTh( z;pBQb&SN$g=SN%~u&h(AHj`qimfc5y!mEYo{=?S8R-u(Jj?@rIz{ND29aC4Ds^K2N?YYwk^KZuHp2azz1Mc0 zNH($qX$q&jc*3c=X+^*?U(j?`j#(Lle~2(?Ak6pr{k#=9MR#TS?$jKwT+vO zd1hFjLDmnXa145yxSf{5Za<^88v&rI9P^euv7t^d#IoTDj&RFLwZUZUleTBB@Xe^m20aFld!WFK^m|&LA?7*5+B0W#rji7Y0ja zWm)%e2Vw-3W-VrltDi?KaB>Sk0SpU&YPz=z!DACMD=B?FQ;|$NlpvLatCD^i9fEtL z6Vz`e+oV@v{mvBc9k$i}z;;9JWRoI0bfMG_0;NNXsbBF{Gcmy1^Pr#{+}-AvdTWYN z_P*WQe_{>+x-)ak=u(f442W;v{Z)4|zkUJD{M_6iuf#?i5y(?18uImhU>ruFtLq>f zzZVTZjoRO}6_4+We3<+&C1V#f1Ud#&<&oQ87q1-r2hT$_#ry zrCd)!r?9`g`(Ii@Vz-5FDA;#{oqy_u)wziK3`o@!E>-cJ+p%4Z^d4!=%36Ri%H%{r zPgsPXZE@7v)-nW0Mbx=dSuulGPl zpBgeUWfdk*M@S}wCBQ7YqULen7dy0<1o<2pBwB{t6erU(dh@wBi7q!k%Ud2vDohF2 z*^|tuYsgES&9zqpR)3=CTm_5*iMJFfFIGH_T2OKZ;>OCcfswKS)v@B}uhyd1nA78C z@E8m8aYfNiYE=*pPmazWPF*sqvzD(|iMhF(@R{5jwJCc&6X}>>sgdmm{`%Wk+~1>+ z?g~kd>d1F=_7EmsPw`<9Nnq47sw2#heA^VGY02^`Ik?a7PBWo%d3h@y5T2HPQgGyW zODn|_JK9~t%$u|!B48y+oVn6RAxESqhv!k(|2I8?X)&=^IS1SILNYD4$^d!2=duLW z)$GXu<|%z2QoJmDHc3h0atuS`a?0xt7lC$ylASrmy(-0Ki>fM{T@&AB_p&8cVsb)I zvS*}<`6VrjU?`#*ddF}r?`J`YZL9KZfsr$*@0U`XDUl{uPKTy+hh~ydFgPr`9R1iDM6!3ZL1ICC&tG~!X#|` z>7V{o?*9`INfB!o5oDr24w`j}r?<5TfB}5vklBmRl(PFl5mZ^?LPz}nVm5z>05+t( z(=ryc2`4Y)%FEoG;!dQPD?8ic@tV}+j~yu{$pPvTdZ5IE3R=yKOh)8O<3kCHb>=SD zftyKTia(dt`}H@~w-WP-olH1rp8Ve^G1O`+f-delT+;nLc$Y8$_SNz$FI_j$Or!$8 zJfj2JrCbL*x>R?X#4ZbWbBFV?EgItn4wmQ=SN2@$RY)bg=oTmV(uJ)wdJCbk6Af&O|Udac#RfE|?D5>iuLL;72Rt}NwN z^+!nsvI0q|o8dun@lD?x5-R>dbQ#arD*%j;6bAUPPQhAP1ybzwusUfPHf5f1&*^@UyJTAFQen!ib3est zG8=qNx5>%RI@r165lE2|b6^?phe~!;91{cd*UfV31f}ERrXcU;DF_?PaxjZ?3gyE zh_)X&&qw3q<~n|&Fho&o+Mw9&>{3R2!~sVV%fF?Vp9#dKTzd^C3er=;jYshx)dn`3 z_W@2Bbv&K-0{2|>$vMRqKlz%P*HT`5^?#^?)D9KCo+koV9zva~!wd*$GMkz z{A$|?ngRC!Nsn{Ko<;g0KZPzzE6t-+Pi#Hcc3;sphn*y3|4Gp-ZHPo~ncLl3iWQ~fi~Mdq3@iZ%s<4Q{g|rd7?o zk}AOeCVPBd{DNf8vDN5>zSbL7qh?GdR$6gOr9sE19z8V zfs5R|3`2|cM7{X_43YwQat9wUcsPllcE_XQ0SRfTemfRpCJnX;-hdzy80O& zwU(V;%*6Lys$HiFH~t`_zSLon94W*(`y_6h3`*+t0J3=p{uEPnGtQ^kE#v3-ge+`e zkiCzv8Fda#60k;|eJH-M#J;H9`5$Hw(4~+M#%eB3Xfa`kS%r2nvZD+?)dL>U@i*Nx z)e~(|ioV~!&`xL`=A8~NgqVadX;s#B*JKAeqoMvE>5BrcR{QUX;*7c<&R&QT_hM zAFsXQY7>rW8+T|k58wCUsCp_y`|J@5so7)du9AgU8&F6zjAEUZ`@glW$jC#vYvt%{ zm+X;$Yas$0cDN(KPlkl@Nh*s(uy-#y>Qk${?O5lNy7n@*P8y3c7ovdup?k z2S2p?{nZcgWX_0btwpKvuEebUYd-yJKTCG)KseU+;koD=ogS3O>5~*UGq3BbIKK%W@W8X9nNRs>;nd&hFY1n zl|&{GvgZJt$v?IhUA7c;k5Fow*i%%VuW59ouGaL${S0@5IBp%=>dw=H&S5%ohYYR2 zll-c^2j_7HWpLJ&n9km#GLf668O$h3TcNDJM17yRUuI{+)W)!TF+EpAD2p{t`+0`TxJM}wQE8UBqo2rv z;$Ez#B{g+?o?SHn*cHo>2jCaG=~)^AVj=qU2QGtFnv1bnzi$fBEZmzyh3>~%c?JIOATiKBm6+nVkFU$f=oeD`bh#D@k-^}V zg~VaJB}zP38Ndyod#aPVJTzH^Mn>YihBPf(%TZ+T6(*f2))0;-`#0#$p}5diF;wM7 zq3Kp3+tL z9v##ncP`Lh@aq_+Mmvx<)TH00(3npRMdjqTaN0-xeW_WatzF7B0~a@s;;oYV925EY zEPntL*W`H*j%ks_ z{S=K_bFd}kbBCI4v_?8`p$XgloYaX+`0yev5kILCx)$+J>GmG;rf{c-a#!R|$;-Ux z1IOlK_Lcq$I7DPDrHTYr<1UTV+w5IQe%+Pl1FIhN4-xr+c^F}C$8aHBL=6XiF>g{E zqf)tTW$di2xgO4~lHAM`zf^>!roUj`McQ=ppVAZks{sJCNRCe5D!mPn(pg6Vpp#&g z#*;}*uIbejDt4gpLGvNWH}*_>6!XATzWUWnq#E->!#qEk??6*qA{1s^I$ECAb&C0D z=>F>1*f`r$gFzs^a3VvtxsR`@Tef47aasy;2KhY4@L&8>eA{RsM7X_ZRjx&9CIw&M zJ-=O>VHljpF~(+5tK7-D_ZUI!9eG&H-tNp(04(7f#B2#8W#e>XDr)GRwm4@*qg|Q` z7UVXn2;9K<_#Pth_}%*B>l8P7l&=0XJJBKW?B<@G0=KJ4c!)vY3M+f4pbexCTaWNd zcTl;IW{fCMKI6Hq>ee+L!g@Ym3#-8!VZ@dF^q1D^4f^Wh`?h+Xw3A1All7C=X5`0% zN$41vqA2FBApK#uMocfB4Pak2s(c2rd3;eF!Tsp~<}5dS{gM$-qnXV@YyfCL3R`nm zcw6k_SC5J^<)XwT9D@)V@l_DF2$C+`{^XN$Tf2OR#Hg%!JG3=wPPLA@6i)#wXv&Ms z=q_INZM8$}Q(jOJo$^~ClMV@ThLB%q|D@$-s4Gt1Ko@U*D_6(iKuR9ra&j0(oVdD6 zyiY6aGDfWC0Y&lXQG&Sf_kI|fxKm>NM{lVbtm6-IrX*fGHBnb58sD?^tsvzhgWam~3&bDGAAV}}sK)p%K zp%`S}T(5_DA!F~tf84cF`x$g(ynh;$K3;xsA$kvY9WJ^3c$iw)@wIe=pI-LTN(Q@t zp#_HXQo;&}jO1bsnw>VFGmj-);V8q+TErJTXz;{qK}UqhT6}?zhc!q~?woTwf6SK1 zz%4~l_fMb*jySi{qy42dxv5+cZ$H*?L`Pe=wpIsUD0m!`?|OX`GHPLGs-1fndK+f7 zr>1VQ81zkH8w%P0fQP2!!k1AA3gmGU2(M z8F28?^yPZ1&}s*DQ|>&*C0uW-wzHFXTP=Dko7WiE;(Ak5;)_PX!}}$i961PO%I*C@Ld|(sV5lT;K03jbFpi?s-XMMW zM~OfF7-y5fugMbigNOBeb&5~DU5V2Q9@JqUUVsf8V1O3syl+~{@?6tS#%=1p(1`P( ziSNAoKe}!VOtHV^2M*R|IwEW#dmE2GQ3fxlPJ2Xa>y2o~QeOGqT`vBY99B$wdw8l1 zT2~@Wi(Kzzp>Z!y>9v|G24ioN6*Sm z_BZa1slJip^-s_L@>SoPn0tHH=QsrSq@25}jcc4Q#A@z~#)e`_dP@wxteS6ii^kl6 zCmr+VuK5j_YLs5Tbs77C$DND0EBwVU#*dvbQKN%`CHHh%&Z(l7AYmSq~IqG;Em6AR%6Rm=ui)Iq#e(i$kjoNVrYzn zs^>EVjIO^Yg%vvZjC99~L^4Mns4742R_q2iWNnS1H0Gl5U@2$wK$gTaV)&f~h`{&% zJm7ju3x@s=?lM!ExO7SMSHoHNBeCudBe#{w8LGKhV}8)Jc=Va%FSwbDJLaxK$?AzX z6VR38ydO-FVjb+OQCqx;6CsYKVq7i-t59*Ubv*D{N*8#i#6LmT>6f6R0Dl778SrgI zEt)v@gORJdwr+GP+J#I3{wS!nanHmra4Z-%#y3S(Vp0>EEZ+x;lD{s zQv$7b zsU5{QEJX^6(0OX_e$bTD4hP|ZoV-2n%1Cxp4AOs^^!DN`HEQ|1{uP2@fAeSTLMesUg6>5hUMfa~g7Klb`*f(^f_vy`$md&?aZHaprqx zI66FZh)52Y&!b;YGugR(QazA`7opwzyXB=&H*b=MpoJLzcsF3 z@Y~Kr{3+vSId4zr^lVdb(%XH6)p;X&i4STVCf@E0c$9gji>ziPuKJ@Imz)fJ1=KGZ zqfMFCFxox@%;~bz{R=qg(u!c~(ov@uXu{CvDT!G@91+o^r|+w1N;O!JvB@R}v*Q2| zQl!A=B@4{yszWEC(7VMZlb4Kh&~yCAsnwWmG}QQ=aw3GX*!1VJJk{Y}WJs&K*nnf{ z&99u_pX3~sXCCh2@z35${Eyce>dL!6imKkA*yBQi#}-~BgGJeqK(BNYuOf)q@3U=~ z`oHfOi8s^Pk!^2$raYP?mr=>CDAV-hN<44liZ&-p9-j+Yc#Up^ZHuh%ZQ&YL! z4lM{gGB!6T%>jiGX;%^-P0^|jaaEYbnLQB|M!{?^mSQA9E}bDSr!l=O&PHsIKhw^+ zs$@zcs^eV2d(C~bPLzW{e!?elxppbCX7vT7;-yGOO(wr=2bmAV+--%O;Cy^~u~r?m zEb?I90U_-5KgODq`{#nLrcBCTKt$EJ*|gg?fGc~z#nuBq%Joz8C!o>;Ipufr!MAg zp1%&&R;YIG8j|_<+kMv-V98JiH!C0JL09J|BF1#V|B0D$8HL~1$Md8yUnAZmER#`J z?_8Rml=7n(&8i%LGXFG7OdR{?h1jz`x#|l8t!fIo&j^v;ZBd|-&jent86@!%SIymM(j`!4b;h&zL39|Iy^O zL$r)=qiyJQ@m(M>HFD=lrv|R-j^*<6@+DrBUaTproXLS=M5z*7oe>uu9tt4Kohjl_ z4)JztO!Z6UER``5oP;KKHv*E0^k^7>WyxD;q=%fv6(%i z#4;T(W0l^q!`uacVk=A7KeapdQxjq456igYCSoO4<^?n;s$ zdabTK#efw#_q(hIfd%Qb^2L9eU%#F^Abu|_*wsBGawLV@4J5O(U?kd|QcRO4D3*GG9%b_Wrd#N!q-u;U08RgUmPS1Ud zn(}{+V1Shee3n<+=(J9hXZb02k#ByYFKc~u)%Or!a*#F|WxArsawx`EQ52carzj$S z4AYlT_!ULp^t^rmJPTFQddwJ=iJ-M zyCNHolJBlQzeKw*FsIq+W_R3FgK^-+^G9zczV7}?}-L7hmAXqv$>DGI)DFnfBNS?pUSE#t3N;g`STCYW(oa& ze``0>V4LJuF7Q|iZHrRaSp@2#@-SlBh4`V@>vh-BK!@{pc)op8U75WJ%-k64-k0@jwNj4NH=z65YWd7TT{$oXkb`P4y z7}`_nx6^f=&^+0wC=rd)UQt-02=xz1fjDY1r-h=lEx;Ug8o>RDHD*;~6{#3ZdEtv= z`mx}shtBBfyUFDy2BH-?+z+6NJi&9p7-^4e+PTOqMWr7Qn>2*&{CZOyH80IW7)~+h zzhpsj*IfOajIf}%7gv$PEq#^3U>t+-7`<=h;o#Bc!Xrp)-$_ro%R8Yu1 zi-N>^S)Xh+PFUm`eNBFG@H{~c(MiB)%Z3p=`#ZJJ2iGJfZved-olWHgA z<8m`FDo^V}V;wab^rk1UN!V3BzmMRC(Kdk2oy;DK_=2^ViABLj9p|e23HlHDtFJ|L z8*)4KgBz>hFh#tcKNm52TJ3AI;NI3Lh1Ff!dg51S6?sYV=+L}-^#Ph{GQ5BVYo@lG zHdTDb_{u?E2&sKn1C6X^;_Zl$0QMTEfZ57kRq*N2Wu3O!YbUPW!{~}3r&wz>tw8D8 zlt?N7nI6*l`1gH$=4QJP*T?rCa&eU8{imHAJ?z>zYochdQ}Q&gs*}B1i#gJpkebg} ziCN^-oE!Jyx*^&FgFR)+dehJ~m+yw6D~S@`=0KTTIi`qAhSlZ?oD9>Na1nFbkTkYS zHR9!~2C>n&?;j;K(Tzw7^DL^!ZWnTI>!a)khUn}Ay(rlYErYWV59Fh&4}UukDVizs z7+eUC*}SfB1s5OMYR_uJSG9fg+DX zA%2y6XVU33;H*S3TC0T!iQw?1>nwZ_I5llFkq!{9aWoH$U3goihIF);B! zbY@v=Q4X-2iw$`!TDy$LG{kQ=am)vbtX8XUFXbAGA27na?jD?jsBz-F*S+PTLX96; zKgBjaOaAf4^pxb@lK<>)l~ENHj_~ls-SGr(ZIp}0LbN3{%dhy$x5+8D!!ViAY)@BL zTkQr}Fr=9s{4p4TIO%IX$T|PN0CO2_bc-@`^HGaSD755-fNfzu#!(IzMwoyA3WL=kH^irlwNJ8=jjABur*x8=MFq)M z(bY;EzK!-^w9Ot{m(fii)`qlpSH^eT$4TlaVYz?gfS|Tfyu&w{J_Sj~{S_6;pNwRG zjbj#aA1t@>;u&WP(A2$pqH{4d*%RvzCRg6;Rjwzv5?LmCf**m|5?`16igVNJ7TqkC zV;Cqwi$}He&LD)ErNZr`Vf3H-U$d?CcffV!va;MX%(z7BPz~w{PqB zq(LOWHYxOy$v-bo)|J%Elg56UI=Q7}G#+5(B9nA3LKUMpdM9IVT3R!mNKv%nte7YB z_D_zqv@KwyM}ywt|1e_aaM1l4|bQjaH-+Z+E2MO zy%$oS+*vRBOTE}GCtFp&+iYmgni#_zJ=Uu(C{ok!OR>`MEb1OW*rP7F7jeh@cHjMF zB_BzxrMqlS?%imsCWiS(x?bVyp9iR`F3T_~44<#xKpnklZ!DNKu; zIG(ASY<8>|V;NTJl%Z+-X*m)Gpcdts^-K7x#SV$&ys5~ zS$JZ0*Kq?nM7%Q30VWxgJv54UF+I=;cFwc-ql>wiw>w!2Hprj+KRqa>oX~{b+l8UM z5!9{pT}YCgh}u@)0d)J;>vGx1JK#lZ{+%B|fb1$AKd%ZFtl(Q&eB(-S&yDt~l7Lc-_%`ZVQ*1Nz-A?D3qHEh1aaqjyl$~kG?ARd!8~*_;})Uz9~vH(gbw9XW3*`3UNbycKe~4vdGdD zkP~^?`UFpI_PR5+m7SBIqkD#njbrb`pod8mz!3IccWv-VotbOYO;fITx;@T#5KeD% zhxeq7-WQL!dV*Qtp@6BW&OH3P4#Qp=EjPd3%9Iu_T6~9yf~VHrM9{$)A3fbKeBM_r zp=LRamgi@A9l$0^AF@S#*6g%-)k=c+XIq&XlhSzfRfM`oa3)@4Ju?r_iO_BqkyPYa z$u#z)ccaiRC?)=PfRMmP|LfPJWr7^X4VCkDUCrT(?PTwTJJ2TJDQpVMjKr9M*`$CQ zjmVJ++bFa2zbQOSd{_7JN=CGj8So#)L^)+QJjprjzLWSx9t1ylTHd)o7-u^Zi76gr zKW+_aj6dmJ@FFA2UD}6<4D>QyMObTbIK9!zYS2-HNG79t}zRZ(B8D6}=-sXKiq% z12A%m$uA7>s9edOT+1kK?2)&vs+u&hD(4l{@1cFMzr~juc_=32c2xvYC|<0^H9w!? zH5)PVBJUp*B%5hWIV=V1XL9G|w+{?Tk@Slay#;;lF~?jMhs>p6*Nj#!oI?7~!-lLW zsy0JlGKoWE9x+u)PP3aTmZBB4KM6x$+c zreaSG=2OQ2)e3g&Ma3enLaevEs8C-(I%E*NCP>(eQ^=$ZpSecuCR;5Q zXL8U~w-`C@rtz}Y6rs^o9S=WbUJfR0Vq6Bd997qIqHs+;cmr-PmcH}{;iTdqQ9u3cOU~k7P3wt=IEppKxeo>%? zr!z7C!yC-%!`ang+7N{3?Pivr-dvgn0WGFI!QI(*%JmR0I%WNS9lDSmF!@IQ5mR;7J_b4CWOcem8QtUN4} z{Cs?TczGb4AUqd@W%^n{c=6W=FZ;r?`AU6fFkpm^#fgMOOfgpu)cjS3BoD>3x=o2t zt1(XvAeb8o*{d995+Z}C{+z~j$Se`g7%UR20_ zIZQ&2QA4Q)mAiwdmE}3XSz$CzdhG+$Bg^$5Dbdo>@TKR1q-x!|*OZ`FX={3o(uHjIANd1SHQCcefqT6{47uH02^L4hMbnNki^PPR_1(0xu!nF&{fi7 zS2izopxbICKW!&Pb{{lO^@)j?I6ZMPm)D_27^M@{65u9HNwmZ=aP;_=f! zUx^aKTuu}@81T{??)6F8h)l5p7xLXvqd|Fd9!3if@qLQE^rbQ`5CK*t1w!vb`DkI8 zFI5SDi+w!(L<%0ugJAsSA4yjG|L^br?!W%?pUHdxs=EKSyJEPuWfFd8>pu_4K0ngp zN0PguZ9p_fcJjm7izkiavD##-DO#*Ggv&ziFWh2qWLfCr1!Wq)d$8)jlv@-qvypm; zOd#43S9^bOatjn1ooQpxvu7xub2~z;v??OcF~ljibO=pxf2l7qlbPL>`Tqm(?e{S`V zf_M?@m&(MhLT>iBm}`LyZu-i6ve=5vJMVN0nt=`i;Y)>E^8g0ltDjh+H6F>(fV}1L z#Nmq{T%}>XHw$s%_}NrQ^X9a}O!`PUr}!)4_o1k%Y=CjAM>tzj>;tCl+>dzYj&A-0 zds|@chd1>74=%;Jq=%>Q?r=4O^8M!hDSYutiy-db9l5Z)7V1pgm6jktx}mN+A`v=) zKap8YelpttuZ}cMPHa9HtFFJdMVOT@$b!~7c-dKu!blrX6biFD%WxPqJtJO;7i5Ml z7a2~**GPqZwiHu|-n++8EUmb@U5gA>zHy0Oc#k4$gtuwFkR5AwrBUMYN3`cW_uWgE zd?Uxw9UK7QFsU}9%?QS-Z{eq(D`wjt;$$8};azgE;8A=)Km<+@?)JXi492s<%|=Wf zKae*6@rOtEdPp<&){6%Ew3^=sJOB=Dc_+ktDIdx(4&Tal;E6t>+;Ss2HmTwH$bOW& z{DWJuF(C-jYE>3|6PomDB~sX(OnECSzdYzlMC~z)V`cfA=2lyrxi~6eZ{XkVUqD(f zqOud_@wDfvEDzf9C}e2%>?emQQsR`FJ$X9xElibKd2ujnRj?jF|76?>ft&AD=7&gF z!ib@;?@v7X_Jx`1^+Spc(3sC|d}DDp3B=rIz3b{gEg@`2o?C&@IQKcx&vGj{eS9DK z@baH;#mnSiT!tnL^2aiY4{0OIt1TlOJie7~=CsU}!?qS}#DN@wmfV@uOnW(n>xf*4 zRy&`omFNL^BAzEAp_iANiLSDzzd=831*E<$_V<|ijcY%JQs5maKQ*ZPJ`t^|wThIs8Kxlh6^`mY{yae@hyT4%$+K-DW+hz)HNer1(uw7t6&<81s$cvoMV zmxbpdMO^NaAYk0U|H2=;@*oiF`8bqy6Aw{t$qy)yy+T*}&z25F8hIMwi+63m%HEXM z%u{605vI?s-NJIb+OcaWSFxB($g7ThkCR^jjy~s~8n~NR6oVStRS@a&tvjx$tsnu&v=F#lNjCT-i zQ@P#FVjA9KFQSh=&g9x;@};)fiv9vKuRSp>j}&8e2e^wAG9BGxKhEntA@{1%y=efq zb~Rfk+0T%tw2(XhQKA`Kjq$dkWjMF_nN0TyFZ;*81!()hw)>~lPX7XzhS8H1=24jR z<5t^rvN`3MG?h7O+h8s4dVgM_KZRA`-mF`HfO1y z3z8=cJ&1VN%eBZMO_%MV#&r8B36aL+rZS3g--x>q&FScOJGKb5UhfhO9Q^G}2KnG@ z-x`XQAZ$+Z9+c5A;LROpXs@c{?#BfU>7V z55ZTujwgmoG&Vms>?~{1)0^IYQ&uPpYgt;5%qu^CEccCDO+>%$B z`jnhYr+vKjR$Yf%uYj|emit)Ct{tPJ=ns5lxlV+<;zEqiz2N&p*eTq43l-iUppEqX ze*pof1u|;=Dg!vGK9$;Ht;Ac*OO}s3T>W2EY!EYOrOtOe3Cpw=ORJr0@<y_+qgnzKu*wq4?jB{p#}Tz9VLf3n@1oml@WQ`%w>pj`I*-K!q)Thmt-PQcN;rkySNbLGGU#WzcIZMXzqfTzI=scrn;eQ>KpT63|C&h!kpw?7ac`!*02 zyS$O-TT+ziQ;%tO-9y8=|KnnDG_@H|XSw%XDL*WZKIL8zAidHU5t}@=4Rh(pd>M6Q zlbA3}>6~ao*XAM+kNki_A<;&bP?o4q`kof6sYuQ5EX8zJ{e?793jb=rCx+&3i4RMb z;zo=(%>vrOqOmYcP&eu=gkg44v9>sxBmHhuvb&_ewD9riZ7XxmAN=Z37a#o}`NkZ8 z$YM(|3Bx_B7f*=8%#&|7+>(SC5=(AT9U~!d$i{GA1A+qmkSo;gz-U}lh>pTs>7i@f zSfz}e6gI|z4kpEyU}DKd2=dl;S`!vrGvt2Ku&qu%3y2;ntXg|%rp&M z73eCTWWFhSI9M0M*(rD7p=`N|;hSV9AHxILr|`fU3|fZM9ZeW?)0}K%*x}~KLM+s0 zD3?B!xL*6A#0%q5E_CP~{m>!-3JZk31jsd2Uh}l#_+pEq4MIDYM?6Mlq5|_YdAi>T z?h4}z_y_AKY*7}b5XWeg1Im$giOIX*L>ZeYp^K4P4=~f4UHO-P`(t?jal1#h7pZ!* z83P4lhWsc0t){@cY$yQI`0Nz)csQ;fC;@o~C^PZ)1sS7IFxQznLs|)BNUY2YowiB5 zNq)A)6Beg`eJBw0zM#Y~BnZAEF1^CSU>0MHw@!?f8EWue#<)3zA7w7}ojsw{AEB6+ zKn9PJ4ulE2HuDHuvaj01k5cA3qWp#|$29tf+``p(9Y3>@huwj%fzE84t53}7=8o$J zewe{zj*Hw?Ce7>(;6bM`a-Jm6`t>&Btq%3h!9_{dfLzGrhyDSpz%^{ixa z;1iZ>N9I$BHx#@$_#l1dW~`MwmkhmeAFrT)1wi=l--Dz88ReHvxReX?k8&<>4_s(F z?pv{KTfQ1lE5+2#SYARV!nL)nKD@!<%*14_e5Nx>{Mej3X_(S~vXoixtF~Zg<7Zoi z3wgr*Uz>`qL7*D2S^+8e&``pMYl>q#x{pK|>PYez4q0eX48{5Yuu8$iTCQ_~=MuLN zmpQps!EvYhRYi)$qp2Q+cQ2G=;k@2R%E0Joa7 zS>tlkw6WBul8tUW!(IcJ^K_2UTkhT#)6$S$5Ch({jkKMM7A|yzDMZW>s-0e4Y*c>e zwrl9~L>lF_ikfXg*t1b0m(35A2Is3G;w>huPxed^LXQA+t`@bsxhzf{t}+u(w=bSy zn9_}_b~?>jSICNQVWnux)vT&4bJ*&p*Gw6qrF)T-65?99@L6&(Ub1jS=iz>pISPN* zI5oIkh?(lZ@Rb9Q_0Ch!rZ_Q7SOv1(CV2*OjXn=`n+Mb(^OV=OSt2(A*Wc=!`=^Kd zTm5S`Jm-eiGq2Dgo2j%~axuV1#I*nQX=)pJvI$GN+WE5m*Esq-i}%-vWk0_k9JkW3 zm%#?F<&YOMvbWldY{jZOru(74WG!91yEwCxOr2M9v&C%)^mL0*0V1zRHS?mHz4=H_@FC8&=_TrEWa#j|K{wEb3eZXbmPn9BkMs><-fNC2e4Xh}HnqP4ZE zcz__xKI-?`QlrJ7u%SPE=SkL_8;l(A0~N-Fluz<0d_pS->y+(}!cEQ5+ZTTKq0Q6# zg`TWu+=+crRiTfln1(EPR)IO4h<+01Il;fUn0GZjIxo5WtcpuKnuBlA{?rPlBbIS* z>4prXfx_#L*be(LgDt@DIgp_Ahe(gn0RoovPL)0~aC=mN@)>Zxk;zG7o>)~PevF}~ zIHGOvii?Tt_Mwks5JB%x8W`ijX4W(rDn|-G_n4<4KZ+g4-nRT9&ms|J`@&)-`bju6 zbOB1lUDEw!7}}S)`E)s)Il61F81>SpvDt|6 zkuKi%6=j^klZL;v6rlzyfSpGj0DL3vNcRGj z(}-tyke7;+QnQdI-_Jydu>L3cngZv%PO|d#-@?&u(_5wB9_wqdXVQw*n|9ANU>*YwKpt%P#%KB3q-mt66o&k2NXe)Gd^(V&(g z!KFqludGViWJ9VvH7f?2nC$R{oB9To`BnBD(YFrhJs?kB*RjN9-M{I7418TLekpPQ zmN;mB$t%h2K0O_I;Fy}ayI3~lR>!3_d*WS3WB=i`IgGCw>XW(9{}u1B7U(lkVv8Rl z+DcI8e2eAFRA6rgUzE=0Kn~)$0#3MhyRLGW#S`fm%soj(~r^x`Sd2`y@V3 z$a>kHVUY zGlxAx3~>op{wThtCH&yZ=U-aBO0w5;v5WaBDf ze=Ua=;SDvus>1MhbcFOYh<(J58fHH{)yjQ1P(QjDSjyQfIkmgWy&eE1D0>pcPHcOF ztj9<$!a#whKfQY)gMFUFmwprbWI2ZU^HfPJg2}?uiRCfFuPHx8Q?< z`H$pS+5U!DD%{r(k~4CN=%v9f)3qr{4=|)=-d*E;!z!ZRo`e@{R4Q}^_T1aS<)6c@ z^L8}*j5)t1o{Bjv`@y8#+Xr@HX;VcH#vB%YZwhC#SI=$mY+ZdKoFF#RQ# z^a(zKC7Wtn=P>SNoZ zoUHFVm>hj)>8rRng7QgxmSA!{KnJ(J7@u38e1gaf?@+%!RL%;PaCisJ7Y``z{z62h z7cj(u6=dR9Sil57f^7N^9z8e<7+{dYXzMog>q{hfOVOPedgm3gUAb!-!5Dl; zwPBd$*)$d4BfEA7@XbW?w}1iW5+689roY6NuAibe271iOy@5K{R(`f}-x(j)KxP-c zWbbr^@!svPVbo?VV0gDoYV(D!NA`Aa8}uNF=59!uZhgCJr42XmwaCtbgn$|~yFZn^^7EL7r(pCx>ZpHN z4L$#=GrhIpQMJ?T&XR3+YS05a(O+4@STkQw-_Vbc-7@bz>h6<6oh9rwVy!I9y6S5f z{ARG%WyKt#0tjL--dVz4^KQp|8Tv#-MFW@T62|+zXw!E(`~6FAD223y^}bE5nt14! zFyPySw(r0E_b)8KDs-)uK99nX%`be*+4|PU{C#(|9#TCq8MkCRergW~09%mFOHW4d z!AGpfHo>%{EQ3MtDflmZlQ;5!50-n|CI(MSm~x+6$8JC*SPD~;Mi>?tbRhU9UdKz= z@zhku$vLfM=XZNLAHI$z?}qsjZ=VUd$f2KK`3wp;pgbqW#NS@|y8s=oWxu{QxCK|Z z2DZVq$uQ%YdotlDw@&iyXBjE1wsK|7}phc4XcrAIKGMgbVXJy*u$8qMLt( zTY-CpYwik~XjWFZ7$|zh*du!Mq#F!6@f@#kLmsCK zbQRQQS2pq+M?HZRE&|V8C4KUtXI>^ufeJbTGo(8cZxThs4TAWC*jh)M=x@HLE3IE^Tz-_F{ph+%{TP|a z4?tGvBH&@iNX`KRYsxUuYP$Twv&CWu7xpqag}sI~@|?46eG;H9W1ayS+=g3^&Q;ln zL(9jgStmT&M69I@eZdWUK)OLH-^Mi2p08n&xDs&WnRwHGpF$+JvsE`4ZSpZ>Ka|cO zzM~Wu55EC9Nt%*gAT76l3~QL9Kw+&1K283VWXesJ!$Afa%3dgL4R1&?=QV}b4fp3N5yA@;uEWah zF%GXs?ELy;{=f1Nx$yU+EZ$vi4jZ4Ad)L1(RhO%B2QVaj(Mbh_>&pWF25UsKVD`CQCj6cMY2)brm?Z zz>^|Za^;BXu3}3YrgFu5a;c1z8WqQJcEWGnfN%`|QmS(D)cQ`v?8iUV*5Z>2J65ia z-DPKp<(d=yE-j%LOp1K8o%r1IhVqe}vha)}a?NkRG`jvtACjHZeee7bf-MK&$=bph>MeQX+Km3``Id{rg1KfT4-Uk_ainp+#*>BUnL~If0kQ_^oV?@l+hjtIW z3mpqb=V`ChpN8H5U=aR&PNH{?5E+kSFOw2i#fA0kPM@X~!*oA_%j=cRjVrLP=`Q?M z_gfF*3(Dnn9(-!KpUxi_;j=^?w52bt@ZWc}g;SZiKGtWo5{UutdVu_{nyYB>247{J zxWn!1o=l$iEzVM8?7yo(nj5_rjq4w==+5GzJE!M`^4KcsPI1Nk)D}m2<9Rrjw!;7wWgn~6aFF3wwxTYA0>;;bMLE*KZ z!|PoI@;ksWmm?q3^QZ?^-2#eM+C};XU{6JT06VZ!V~XnVl`gY$nG->qQQ=kMTWQ5=wd~s6h1W@2WON;z#MDOYkjyrG z9g6MAgWn28t-a`04@th=;RQwM5hBuZ3*P8W$NV0@<|t58Y+kTQK z6%Kv5x1=s;M=D7S-F$OW_%&%y zaw2bEqEL3W%-O|sMpEWw=pj~#VdTYPwiLk^(k!o=@H$z)tyanV>DTXdyZc9B<#fxk z=o$me4s$2Q_H*B*w+WA^+Z*w>XChJ1UHXkqPeqm=znAWlR2(z z>dsY}$_`y@+33yEvPk*m8$VIf)E&`>_%s81iA=Q1$>!c22 zPi=F$W+s=7w(dG{C)b)R51n_RU^(y#p@)l8*Az-87whVgpozPVKt4q4cd>X8ss6kb z5(XO2jn8s^yc`1UL1$&@qJfcD>-@|)OBGHLywxOSKi@D20`!i6f*K}WRF zzN?TPY);=Q+ZIyzKlROm-kfL+`VD6TzYYGVa$n0`M$#d<2`+-0*K_RUa==}1$%7y( zpIR8%?0c790gO3S2zfgDdA8#n>AUO(Sdp_Sq@o>AG|?a4GSshjq8BZh-J9fF@*~;& zPZM7h_(TAfJO#qBU#J-R*{*>d{O%kmP86T>*ta^9&r|f#7YQZ=04dj?P*WuI!Lo8M zGj+zbaXt}4jlYF+!E@S$8nON^oJY*uFIor*avS~a1vm}&Qo?UQ(C2a>%v~4dp`)V_ zIgE6(F!%LFPJ&j+L8|l*Dp=nD<5ML!37{WfuaPgzsPLA*Tg;vfD0oX!!<0qf!N4e^ zsY{3Wl|hFH9VwKo|I(siKvJV$Yn)T$LRMx4w=nhui>_0mrW=71<92@0c?R@3+q(NZQY&Rrq7So7cr5hJG9k3=iN-9kPsY$$$UK^mc1SIf zdks9tI@w>bmVsSKJ{to%Hi-pU<6l~|7PQh*=sSZ`Gsv@&t0m>Bg}D4K`Nqs@ilY$U zR3R;6)6AVkR;Ab*hhB_HLBTr zCbDBC=7Mw=o0Bm7%yl7#6S1vAZU}dqnfjWE=PUNrk$hn=EPW`1fq`kclDI4ej2}5A zv$u7x)nw@DgRP^77x|E)i?;H}Z_L5lb-lAAtMWk!Acx;!{Yx|qgii3EZGKM9=mizp zc$5JxcYSF7>k}0Z2+`q7Q+xJk=_#wg_`uT@<{i*24_qvSdsK$Z-L;SpV=%ovAs1X? z0nUK)G#XNL*Rl=#T>S>V zfrE&aISl+_Yz8xuXz#A1qPt9(D=_k%{>me~wZ!%7bD-G|Qep5nXUJau>?Np^SE8?n zdx>kFS0(b~ip$bhv!I0urB!rLh*9q-MHKo~{s@n|;s()w^d<>)eG&oY^mXZUS6qeB0bUx6*!l^L0|vy&43vR~R)x1blhv3v!t zk*^#a&b(CZ4*nK@F@qd}*qtkw`Mxr8;f`&A{uy$!AK9RI^%rnt(%Xw-u9Xc`l zJxa-p?4`?gM&NuNsR6V@f$^Lh9lxq)0Y8PDw4?V{%t^{Qk2!7R@v!r067rjVIXN3Q z@}X9typ+;sI+PU~qiok=?#Ew5E|B4K5-6fM7jT5LaglqAhhLR5T|C8^w^0th9}1IC zGF`zaeGBAR4EPUon_1C9jncCOime7CtxsxyO$&0l6u8m4r)|SkFDaNArD_eRcADAT z7ODS~h|mRwtFa0-)`^^H8nZje zuJEW{CLtpSrDy>J6O$-o7Aj=WxF4O&L(vFWqOq#xxTR{FGBHcSSBJDqvg;=v$1p?8A^BXz6VHfIQFWrmZSnzLP-i>daSAV;kltQP84pq8TT=1GM&84| z&r~A=?j%KYTrZYcI0C*9J}S~rGTLx6_A_+;7kB-q0-xS|+%xZUmwcxscn3 z_jcDeU9~Tmz}Mf!f8p=tt0~^C{pOotMmg4j|AA_UmtUD$~)lA7+%a^!u} zNqGcIhnFAOINJ$(%AvHaL@yxnsv%EF4=nv+0Bm8);H(hZc zIt?B~!ij18jYKUkdsUr+ty%;HfV5mVv;)Y z%A3&H3@8!(37W6J7AcIH`S6PTR8_dqChwE6PN{iEBSO@8DIuB`*#L zkMXu%?jwlFEvterr1NYWFi z4MqZoJF!{tH7)}5H2$>AJ#{9h@%aBpIHicGo_DD19E`!|_SY}5BBGr@&KY-<-{Y+$ z=Z90%GTapAMr8Plyzd&UpNZsV$A5LhDB-K+%P{=y`0JmWjCYPqqoa>B62;z0zA-ZN z>?)Hoo0d$pqW?76SB`D1kfWYOcH?};sQ{)$K#_i2RM833=D0W!@ff*;=D;&C@X|vN zMo*++X-@O|>%MaGoOvY|*on_y#HEa&wxcsXV z_5s}laTNvy(Ebp(h!@S(7R!`VBZ-hYu|H#DZ5iQ@0YM zKkK}FAIm`0NzFXvzLcQQ_By|vDREqv$tAo*7AksMm()5v?ft~acXZtd~^|g~* z-FVgifeTb3c~E3&*Y`mCq6XTL->1#=ZZq=eroQkNwsUl6TqpGgBtPvzx{RFI!l27C zo*({@$SW8ZoptbIF8P^CMhzG@wmqv8UZYT#+e$*aMZ_?q z<}a=5_mjmJY0K1ZM^Ay;nCd8aInm#J3s4pGh$y^=x-@bEWpyqqlTnn71P++qrm4Va z1Gf|V)+17LRBGO>e9dY%G>PQ6T8c?I?e^jMg`Rnw8-FG}>Nfe=?vJ@|5PLndpkCFUl}8q`Gm!J35aX%r$A2Fl zT4;{Eg$`LMj>?Pt_<|yYvun!C&&w5yCy}3C`m~zF${SfQo={kzgpoO!eIfG1>m+Y0 z1e_A3R^%7C366WYX+L%}^-~mV(Z(?;dB{@;_vSddVK7LMiO^WW2Lxw6J1X<$56EKZ z@NrV6GK?B5!2`N;Le)Jgx@4E>+dq7hQY2AHxbN6GmC_v+5nVG=?Nq4^FabaYUj5%l zaMQSkEr#G|Gs5))MlP497oqc2TPwMz`j3*&#eN5w!uL|51&TPAYd^jEQ{)bf{N$Y6 zfFlS&lx$t>XwMfbZhxDP>naBE7t~tnB2Dm52|;V+lD^$f(EyB!d%51w5RPPC{lb zz8&8c-~Hc@-kv<6kah|(v(p}Y>zNhg;(1i^&7;X`U@@n-LdQ7=&H(m*YEYaOh%;r=$2EMJc|7` zc;g^o3nLo-9c>}pP05~AZYE(bww%^Hm2`_81Xyz9#Yu8ml~)spO0w{`Q+~NiZaT@U z(vneI4#F$IOQ(F^E$ndc08LemrLgW~!82=f&{tzF8=1RlKmB$S-=krpA!kKoERAHl zx868swT%?7wFJ?Y9qyAm->~RY-R>>yE#{W(#cX>-Y#C;34Rv0`wP3FtoZ?37AC4?9 z$0mSTiZcKrqJB8MXGl$j?%ha{VGFz4@k8NESIow9p5**&nc~}K*>}0eOS|K?JiOpi za&}-!GJFfG8ys=EnN{XGea*;())%A)%1P#a3nO~IRv^LEo>%Mr=-{U0P}8s0-`WC0 zJPWl~G2$Gh0E3=%XKtK|Ss>=D zO{+uh_6?tPphig-?jqTw8rDjzlzG4O)N@v3L#bEj94 zUS=(ZiLWZwN&P++Ecz-7JZ||up1k^1ehYe>k%^;MS*Mx#8NfW9Lv?Pp&c{# zqv0{LNtoC;;zP5W)lKU`#=On;&U%IJYt4QtM_NXQ}jy2Iy%ZvbI-$GFGeVjZ*V1-5o)cGqldmGM>4 zTXM)sx=6q;1!gaCyD!9u`9&3713V3G*LU9^$C_o!^Pqos7e1%IjBc8`K8t zlkB~W`(R+*eEPHWL`RBz)RCm%9I+*Fr!aRsiVyH1r8&kj%J)?hdqNekkI1L^l>2zVz>~J6$36d|-BTxM zHxP#wj?gk^I|N>JUu5}{a8ShdrdLIaF8Q#tM^}u8OTOv>%U@&Y*q><&h%m$v53_nc zvtQD&POjL&m*imO8QLRsY7|}Qn%Am9M18o z=Z&sc6vg~yj!(ZQ-`*n%T8v@*f?vG!oSlX*Mhs38n(Jvp4&CA0_!bh_mCeOLp<~tx zVC;%V$8zsHwh{1Z>B;T{VBV`Nloex=TZI<8z71<5^64fl##H+cwG$&;4wt%A6Y+i8%Zq#UOvQU){$n`J!QFTDHOFM)>+Q1IOEf(roC5B~W4JUfm#YjL8ll7^y-?iP z`5TKzk$ol4w(7{zMvvZn6<_fv9}7O=QUn+dSrqQtb8wOzeQ^&*=<+zYk$GBID{ZN9 z54h?0+#JXRn8v^}h!k{-6l4g+M&?nfw?mif&?a4V73K=$#O&(rT5+J{=o?yB}Ir)Y+5FyAM5F=x*MGzwMps=yau8Lie`g8TsJOfEqkK3wzjp z1X=G!9X^xj7vf3O-D{D>8c8z!qA6Sp-e$sBvB&k5ZHY#rOF3rL@wf+@>tM(c&%+Iu z_7m;V2#1(e`zsH^eV2GI#>3O9X9sb{?{E@*-60k*&3vzv{6@Uil8%La=Nq9Rr((Ce ze&x$^Zzhk};mZnF=f!Ul&yL5q!(lJl*4!-|a9_j6wtmJY(eoNqVn-?hz{94Ie2hSS18Hs3FY5+JAln5XDSqbyq zNs@w{c$QoS@+a)KJlR*~SQa9TU?tZ&pS7)$`b-X5&0Smlp6OBVvi1XHkCnjCG3u2z z3`4~R_3uF*mGW_f?fR_B=KwIAoNkDmXS8;o>r3D1jh)}xRmuK0llPhI#wY<=K1^mJ zcd9UwgEvYK`ozP$6!z|z_2vtD=I|7LL+Q(Q$ZND#3)O&Obqtv(N)JQXdG=(niZV*o zoTr}pYz<0rS2D?c!?ltP*R!ex5Q(Ni{Q8wASt8LBnl%m%46o;2=k}=QScU2*jA(BA z!_P)3Stsxdzw=cEj|JQ4)pD-nJMUQ$hw8{J- z_-(QQ}^#!TlK7DnE>|U z`aG_~Uzdx+P361AzpxHSlO0iwyn~xLt?5ltAJ;HyQ8vlRHBF~Rz2pWTZX3Wf9fG@+ zK9S(xJ!^UMm@;o}k$?_^%`9Om-Wqj#McBD+UGY%B_I@ifqyrn0A(a}MNu@T&A`MPrf5E- zp$}7*ZML|VJ-UuT-w4%D-;Un?vn8gaWZ0^u41XT~wku5CB~OIwSEFZjD$3R&!ND_E z*^XE`@Vl})$@rx4yJf?kD5rJ8ePjiq?MdJHPE20V?LCqzU(xl(6HORhzTA_Yh(wCq&ERz{0`-i zEXwCJelu(_B5PzpQ&I7jyrUt^6;cXY3hzWU;YPZ3_Ww1@gWKrp@eYrklu|?-uXY|X zA9X5l!cN&#AZ<1K{gq5ZEkLIyJ=7ATySbmI%nn~S$@4DfzIR1IcG+iAzI^x2!*vw0 zbH`TBO^?^M@fk26MnEexawdzS%xC-eQ5d-8=F?zaN{n{Lcs-_vmao+K0)_i!sGn`s6>xTe=D7g zM=?1H+sNHlkAk6bZWw{8`oJt-B<(s1e$<@olEkr29qt?wW`RhY@&TfF?tL)<; zW()BgJ*uNSto4v!Zc4)wpZmiCW^ZCY|D&R}Fff23+mldlo-Z~7Z^k1Wn*@VBD*Jkj z=Yx9?wc+4N0Zru&2a5uz{q{So@%Py)b=vGr>~VY4IFD+a_ce;q%cDlW)7!$OWzT<{ zy={gbXcD{~ZRS73!q20c^}Qds?vS74(4xe4Dx03e z4rHU?%{*${@JSJ-j?CT$Uuqj(N=T{TU4-cLQTi>}xnuAt`i~u72)cYQUM<)OBpF^i zM)3FpI%W^e)y{W>2@eu~3t)!;Uk7Pk4?Q?1TNGcy`N9{h!;1)XMtFn)9iF%k?kuo_ zeLk5v-4pRcJYV^tyrEf!{`AAv;k^Cm9bpA8`>1an(bqe`&?oo`kNP(Bbuc$AYkM{l zZx^?b^DU-~T&3$|PS@6e@LOVX^9vuzk1FYJGgk2b0Z>Z;0v8Ju000080IopiK0{>y z#G<1B069wo04D$u000000096X0Jebu0001NVPtuBVQp}7Wpi^aWMyVyb!>DfGB7bQ zEigANFgGbJPg5>*cyv=l1OTi60031~1po&Kp%qS0O9ci1000010097I0001FqyPW_ E0J|a8F8}}l diff --git a/testing/btest/Baseline/scripts.base.protocols.irc.dcc-extract/irc.log b/testing/btest/Baseline/scripts.base.protocols.irc.dcc-extract/irc.log deleted file mode 100644 index 28ca448e05..0000000000 --- a/testing/btest/Baseline/scripts.base.protocols.irc.dcc-extract/irc.log +++ /dev/null @@ -1,13 +0,0 @@ -#separator \x09 -#set_separator , -#empty_field (empty) -#unset_field - -#path irc -#open 2013-06-07-19-08-42 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p nick user command value addl dcc_file_name dcc_file_size dcc_mime_type extraction_file -#types time string addr port addr port string string string string string string count string string -1311189164.119437 UWkUyAuUGXf 192.168.1.77 57640 66.198.80.67 6667 - - NICK bloed - - - - - -1311189164.119437 UWkUyAuUGXf 192.168.1.77 57640 66.198.80.67 6667 bloed - USER sdkfje sdkfje Montreal.QC.CA.Undernet.org dkdkrwq - - - - -1311189174.474127 UWkUyAuUGXf 192.168.1.77 57640 66.198.80.67 6667 bloed sdkfje JOIN #easymovies (empty) - - - - -1311189316.326025 UWkUyAuUGXf 192.168.1.77 57640 66.198.80.67 6667 bloed sdkfje DCC #easymovies (empty) ladyvampress-default(2011-07-07)-OS.zip 42208 application/zip irc-dcc-item-A3OSdqG9zvk.dat -#close 2013-06-07-19-08-42 diff --git a/testing/btest/Baseline/scripts.base.protocols.smtp.basic/smtp.log b/testing/btest/Baseline/scripts.base.protocols.smtp.basic/smtp.log index ba16578dfb..b56b8afab6 100644 --- a/testing/btest/Baseline/scripts.base.protocols.smtp.basic/smtp.log +++ b/testing/btest/Baseline/scripts.base.protocols.smtp.basic/smtp.log @@ -3,8 +3,8 @@ #empty_field (empty) #unset_field - #path smtp -#open 2009-10-05-06-06-12 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth helo mailfrom rcptto date from to reply_to msg_id in_reply_to subject x_originating_ip first_received second_received last_reply path user_agent -#types time string addr port addr port count string string table[string] string string table[string] string string string string addr string string string vector[addr] string -1254722768.219663 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 GP Mon, 5 Oct 2009 11:36:07 +0530 "Gurpartap Singh" - <000301ca4581$ef9e57f0$cedb07d0$@in> - SMTP - - - 250 OK id=1Mugho-0003Dg-Un 74.53.140.153,10.10.1.4 Microsoft Office Outlook 12.0 -#close 2009-10-05-06-06-16 +#open 2013-07-25-19-52-35 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth helo mailfrom rcptto date from to reply_to msg_id in_reply_to subject x_originating_ip first_received second_received last_reply path user_agent fuids +#types time string addr port addr port count string string table[string] string string table[string] string string string string addr string string string vector[addr] string vector[string] +1254722768.219663 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 GP Mon, 5 Oct 2009 11:36:07 +0530 "Gurpartap Singh" - <000301ca4581$ef9e57f0$cedb07d0$@in> - SMTP - - - 250 OK id=1Mugho-0003Dg-Un 74.53.140.153,10.10.1.4 Microsoft Office Outlook 12.0 A1IqG95k9Tk,VUcocHqaWva,JJPHrvZaGJj +#close 2013-07-25-19-52-35 diff --git a/testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/extractions b/testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/extractions deleted file mode 100644 index 45d776a8e9..0000000000 --- a/testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/extractions +++ /dev/null @@ -1,277 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - k6, k6-2, k6-3, athlon, athlon-tbird, athlon-4, athlon-xp, athlon-mp, winchip-c6, winchip2, k8, c3 and c3-2) - "windows.h", he gets all the WinAPI! If he adds "wx/wx.h", he gets all of - #included directly or indirectly)! - (available when right-clicking the class-browser - (still can be overriden by using "-c" command line parameter). - (the same filename as the project's but with extension ".layout"). If you - - Enable use of processor specific built-in functions (mmmx, sse, sse2, pni, 3dnow) - - Generate instructions for a specific machine (i386, i486, i586, i686, pentium, pentium-mmx, pentiumpro, pentium2, pentium3, pentium4, 20 - - Strip executable - -c - 20 - Instead open the file in an already launched Dev-C++. - It used to be a checkbox, allowing only two states (on or off), but there is - The user can define this in the class browser's context menu under "View mode". - Well, it adds caching to code-completion. Depending on the cache size, - a third relevant option now: "Project classes" so it didn't fit the purpose... - and selecting "View mode"). - cause of many errors (although it should be fixed by now), we are giving the - class inheritance and visibility (shows items only from files - code-completion and the user has all the commands (belonging to the files - compiler: -D__DEBUG__ - displayed in the editor when the mouse moves over a word. Since this was the - have your project under CVS control, you ''ll know why this had to happen... - he added in the cache) at his fingertips. If, for example, the user adds - include files can now be generated. - information definitions - it in the private resource) - its counterpart are highlighted - resource) - the program may take a bit longer to start-up, but provides very fast - the requested CVS action - then we even get a stack trace in the bug report! - user the option to disable this feature. - wxWindows! You get the picture... -* "Build priority" per-unit -* "Compile as C++" per-unit -* "Default" button in Compiler Options is back -* "Include file in compilation process" per-unit -* "Include file in linking process" per-unit -* Added "Add Library" button in Project Options -* Added "Classes" toolbar -* Added "External programs" in Tools/Environment Options (for units "Open with") -* Added "Files" tab in CVS form to allow selection of more than one file for -* Added "Open with" in project units context menu -* Added "Tip of the day" system. -* Added *working* function arguments hint -* Added CVS "login" and "logout" commands -* Added CVS commands "Add" and "Remove" -* Added ExceptionsAnalyzer. If the devcpp.map file is in the devcpp.exe directory -* Added bracket highlighting. When the caret is on a bracket, that bracket and -* Added configuration option for "Templates Directory" in "Environment Options" -* Added display of project filename, project output and a summary of the project files in Project Options General tab. -* Added doxygen-style comments in NewClass, NewMemberFunction and NewMemberVariable wizards -* Added file's date/time stamp in File/Properties window -* Added new WebUpdate module (inactive temporarily). -* Added new code for code-completion caching of files (disabled - work in progress). -* Added new compiler/linker options: 20 -* Added new file menu entry: Save Project As -* Added new option in class-browser: Use colors -* Added possibility to include in a Template the Project's directories (include, libs and ressources) -* Added support for GCC > 3.2 -* Added support for macros in the "default source code" (Tools/Editor Options/Code) -* Added support for the "interface" keyword -* Added support for the '::' member access operator in code-completion -* Added the possibility to modify the value of a variable during debugging (right click on a watch variable and select "Modify value") -* Added the possibility to specify an include directory for the code completion cache to be created at Dev-C++ first startup -* Added two new macros: and -* Allow customizing of per-unit compile command in projects -* Allow user to specify an alternate configuration file in Environment Options 20 -* Backtrace in debugging -* Big speed up in function parameters listing while editing -* Bug fixes -* Bug fixes -* Bug fixes -* Bug fixes -* Bug fixes -* Bug fixes -* Bug fixes -* Bug fixes -* Bug fixes -* Bug fixes -* Bug fixes -* Bug fixes -* Bug fixes -* Bug fixes -* Bug fixes -* Bug fixes -* Bug-fix for double quotes in devcpp.cfg file read by vUpdate -* CPU Window (still in development) -* CVS support -* Caching of result set of code-completion for speed-up. -* Changed position of compiler/linker parameters in Project Options. -* Changed tint of Class browser pictures colors to match the New Look style -* Class-parser speed-up (50% to 85% improvement timed!!!) -* Code-completion updates -* Compiler set per-project -* Compiler settings per-project -* Compiling progress window -* Current windows listing in Window menu -* Debug variable browser -* Debug variables are now resent during next debug session -* Dev-C++ now traps access violation of your programs (and of itself too ;) -* During Dev-C++ First Time COnfiguration window, users can now choose between using or not class browser and code completion features. -* Each project creates a _private.h file containing version -* Editor colors are initialized properly on Dev-C++ first-run -* Environment options : "Show progress window" and "Auto-close progress window" -* Error messages parsing improved -* Fixed many class browser bugs, including some that had to do with class folders. -* Fixed pre-compilation dependency checks to work correctly -* Fixed the "compiler-dirs-with-spaces" bug that crept-in in 4.9.7.0 -* Fixed the dreaded "Clock skew detected" compiler warning! -* Folders in Project and Class Browser -* Implemented "compiler sets" infrastructure to switch between different compilers easily (e.g. gcc-2.95 and gcc-3.2) -* Implemented new compiler settings framework -* Implemented search in help files for the word at cursor (context sensitive help) -* Implemented the "File/Export/Project to HTML" function. -* Improved Indent/Unindent and Remove Comment -* Improved WebUpdate module -* Improved automatic indent -* Improved code completion cache -* Improved editor -* Improved help file -* Improved installer -* Lots of bug fixes. -* Lots of bugfixes -* MSVC import now creates the folders structure of the original VC project -* Made whole bottom report control floating instead of only debug output. -* Makefile can now be customized. -* Many bug fixes -* Many bug fixes -* Many bug fixes -* Many bug fixes -* Many bug fixes -* Many bug fixes -* Many bug fixes -* Many code-completion updates. Now takes into account context, -* Modified the behaviour of the -c param : 20 -* Multi-select files in project-view (when "double-click to open" is configured in Environment Settings) -* Necessary UI changes in Project Options -* Nested folders in project view -* New "Abort compilation" button -* New WebUpdater module. -* New class browser option: "Show inherited members" -* New code tooltip display -* New debug feature for DLLs: attach to a running process -* New environment options : "watch variable under mouse" and "Report watch errors" -* New feature: compile current file only -* New option "Execution/Parameters" (and "Debug/Parameters"). -* New option in Editor Options (code-completion): Use code-completion cache. -* New option in Editor Options: Show editor hints. User can disable the hints -* New project option: Use custom Makefile. 20 -* New splash screen and association icons -* Now checks for vRoach existance when sending a crash report -* On Dev-C++ first time configuration dialog, a code completion cache of all the standard 20 -* Other bug fixes -* Possibility of changing compilers and tools filename. -* Printing settings are now saved -* Profiling support -* Project manager and debugging window (in Debug tab) can now be trasnformed into floating windows. -* Project version info (creates the relevant VERSIONINFO struct in the private -* Removed "Only show classes from current file" option in class browser settings. -* Resource errors are now reported in the Resource sheet -* Resource files are treated as ordinary files now -* Run to cursor -* Saving of custom syntax parameter group -* Send custom commands to GDB -* Separated C++ compiler options from C compiler options in Makefile (see bug report #654744) -* Separated C++ include dirs from C include dirs in Makefile (see bug report #654744) -* Separated layout info from project file. It is now kept in a different file -* Support XP Themes (creates the CommonControls 6.0 manifest file and includes -* Support for latest Mingw compiler system builds -* ToDo list -* Under NT, 2000 and XP, user application data directory will be used to store config files (i.e : C:\Documents and Settings\Username\Local Settings\Application Data) -* Updates in "Project Options/Files" code -* Watched Variables not in correct context are now kept and updated when it is needed -* WebUpdate should now report installation problems from PackMan -* WebUpdate will now backup downloaded DevPaks in Dev-C++\Packages directory, and Dev-C++ executable in devcpp.exe.BACKUP -* When adding debugging symbols on request, remove "-s" option from linker -* When compiling the current file only, no dependency checks are performed -* When compiling with debugging symbols, an extra definition is passed to the -* When creating a DLL, the created static lib respects now the project-defined output directory -* When running a source file in explorer, don't spawn new instance. -* Window list (in Window menu) -* XP Theme support -* added ENTER key for opening file in project browser, DEL to delete from the project. -* back to gcc 2.95.3 -* bug fixes -* bug fixes -* new update/packages checker (vUpdate) -* support for DLL application hosting, for debugging and executing DLLs under Dev-C++. -* ~300% Speed-up in class parser -Find the attachment -GPS -Hello -I send u smtp pcap file -Version 4.9.4.1 (5.0 beta 4.1): -Version 4.9.5.0 (5.0 beta 5): -Version 4.9.5.1 -Version 4.9.5.2 -Version 4.9.5.3 -Version 4.9.5.4 -Version 4.9.5.5 -Version 4.9.6.5 -Version 4.9.6.6 -Version 4.9.6.7 -Version 4.9.6.8 -Version 4.9.6.9 -Version 4.9.7.0 -Version 4.9.7.1 -Version 4.9.7.2 -Version 4.9.7.3 -Version 4.9.7.4 -Version 4.9.7.5 -Version 4.9.7.6 -Version 4.9.7.7 -Version 4.9.7.8 -Version 4.9.7.9 -Version 4.9.8.0 -Version 4.9.8.1 -Version 4.9.8.2 -Version 4.9.8.3 -Version 4.9.8.4 -Version 4.9.8.5 -Version 4.9.8.7 -Version 4.9.8.9 -Version 4.9.9.0 -Version 4.9.9.1 -version 4.9.6.1 -version 4.9.6.2 -version 4.9.6.3 -version 4.9.6.4 diff --git a/testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/filecount b/testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/filecount deleted file mode 100644 index 0cfbf08886..0000000000 --- a/testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/filecount +++ /dev/null @@ -1 +0,0 @@ -2 diff --git a/testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/smtp_entities.log b/testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/smtp_entities.log deleted file mode 100644 index 865694e8a2..0000000000 --- a/testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/smtp_entities.log +++ /dev/null @@ -1,12 +0,0 @@ -#separator \x09 -#set_separator , -#empty_field (empty) -#unset_field - -#path smtp_entities -#open 2013-06-07-19-32-56 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth filename content_len mime_type md5 extraction_file excerpt -#types time string addr port addr port count string count string string string string -1254722770.692743 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 - 79 text/plain - smtp-entity-mR3f2AAKo11.dat (empty) -1254722770.692743 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 - 1918 text/html - - (empty) -1254722770.692804 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 NEWS.txt 10823 text/plain - smtp-entity-ZNp0KBSLByc.dat (empty) -#close 2013-06-07-19-32-56 diff --git a/testing/btest/Baseline/scripts.policy.frameworks.software.vulnerable/notice.log b/testing/btest/Baseline/scripts.policy.frameworks.software.vulnerable/notice.log index f2cf09cab6..54b04aafae 100644 --- a/testing/btest/Baseline/scripts.policy.frameworks.software.vulnerable/notice.log +++ b/testing/btest/Baseline/scripts.policy.frameworks.software.vulnerable/notice.log @@ -3,9 +3,9 @@ #empty_field (empty) #unset_field - #path notice -#open 2013-04-28-22-36-26 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto note msg sub src dst p n peer_descr actions suppress_for dropped remote_location.country_code remote_location.region remote_location.city remote_location.latitude remote_location.longitude -#types time string addr port addr port enum enum string string addr addr port count string table[enum] interval bool string string string double double -1367188586.649122 - - - - - - Software::Vulnerable_Version 1.2.3.4 is running Java 1.7.0.15 which is vulnerable. Java 1.7.0.15 1.2.3.4 - - - bro Notice::ACTION_LOG 3600.000000 F - - - - - -1367188586.649122 - - - - - - Software::Vulnerable_Version 1.2.3.5 is running Java 1.6.0.43 which is vulnerable. Java 1.6.0.43 1.2.3.5 - - - bro Notice::ACTION_LOG 3600.000000 F - - - - - -#close 2013-04-28-22-36-26 +#open 2013-07-25-19-54-45 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p fuid file_mime_type file_desc proto note msg sub src dst p n peer_descr actions suppress_for dropped remote_location.country_code remote_location.region remote_location.city remote_location.latitude remote_location.longitude +#types time string addr port addr port string string string enum enum string string addr addr port count string table[enum] interval bool string string string double double +1374782085.726121 - - - - - - - - - Software::Vulnerable_Version 1.2.3.4 is running Java 1.7.0.15 which is vulnerable. Java 1.7.0.15 1.2.3.4 - - - bro Notice::ACTION_LOG 3600.000000 F - - - - - +1374782085.726121 - - - - - - - - - Software::Vulnerable_Version 1.2.3.5 is running Java 1.6.0.43 which is vulnerable. Java 1.6.0.43 1.2.3.5 - - - bro Notice::ACTION_LOG 3600.000000 F - - - - - +#close 2013-07-25-19-54-45 diff --git a/testing/btest/istate/events-ssl.bro b/testing/btest/istate/events-ssl.bro index 249ebc3754..d227417c15 100644 --- a/testing/btest/istate/events-ssl.bro +++ b/testing/btest/istate/events-ssl.bro @@ -41,16 +41,25 @@ redef ssl_ca_certificate = "../ca_cert.pem"; redef ssl_private_key = "../bro.pem"; redef ssl_passphrase = "my-password"; +# Make sure the HTTP connection really gets out. +# (We still miss one final connection event because we shutdown before +# it gets propagated but that's ok.) +redef tcp_close_delay = 0secs; + # File-analysis fields in http.log won't get set on receiver side correctly, # one problem is with the way serialization may send a unique ID in place # of a full value and expect the remote side to associate that unique ID with -# a value it received at an earlier time. So sometimes modifications the sender -# makes to the value aren't seen on the receiver (in this case, the mime_type -# field). -event file_new(f: fa_file) &priority=10 +# a value it received at an earlier time. So sometimes modifications the sender# makes to the value aren't seen on the receiver. +function myfh(c: connection, is_orig: bool): string { - delete f$mime_type; - FileAnalysis::stop(f); + return ""; + } + +event bro_init() + { + # Ignore all http files. + Files::register_protocol(Analyzer::ANALYZER_HTTP, + [$get_file_handle = myfh]); } @TEST-END-FILE diff --git a/testing/btest/istate/events.bro b/testing/btest/istate/events.bro index 21f46cf4b3..1edf14fee7 100644 --- a/testing/btest/istate/events.bro +++ b/testing/btest/istate/events.bro @@ -39,12 +39,17 @@ redef tcp_close_delay = 0secs; # File-analysis fields in http.log won't get set on receiver side correctly, # one problem is with the way serialization may send a unique ID in place # of a full value and expect the remote side to associate that unique ID with -# a value it received at an earlier time. So sometimes modifications the sender# makes to the value aren't seen on the receiver (in this case, the mime_type -# field). -event file_new(f: fa_file) &priority=10 +# a value it received at an earlier time. So sometimes modifications the sender# makes to the value aren't seen on the receiver. +function myfh(c: connection, is_orig: bool): string { - delete f$mime_type; - FileAnalysis::stop(f); + return ""; + } + +event bro_init() + { + # Ignore all http files. + Files::register_protocol(Analyzer::ANALYZER_HTTP, + [$get_file_handle = myfh]); } @TEST-END-FILE diff --git a/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro b/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro index e31abe5ea3..a3704618bd 100644 --- a/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro +++ b/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro @@ -13,6 +13,6 @@ event file_new(f: fa_file) &priority=-10 for ( tag in test_file_analyzers ) Files::remove_analyzer(f, tag); local filename = test_get_file_name(f); - Files::remove_analyzer(f, [$tag=Files::ANALYZER_EXTRACT, - $extract_filename=filename]); + Files::remove_analyzer(f, Files::ANALYZER_EXTRACT, + [$extract_filename=filename]); } diff --git a/testing/btest/scripts/base/frameworks/file-analysis/irc.bro b/testing/btest/scripts/base/frameworks/file-analysis/irc.bro index 2b93a59a8f..9fd8e06613 100644 --- a/testing/btest/scripts/base/frameworks/file-analysis/irc.bro +++ b/testing/btest/scripts/base/frameworks/file-analysis/irc.bro @@ -4,7 +4,17 @@ redef test_file_analysis_source = "IRC_DATA"; -redef test_get_file_name = function(f: fa_file): string +global first: bool = T; + +function myfile(f: fa_file): string { - return "thefile"; - }; + if ( first ) + { + first = F; + return "thefile"; + } + else + return ""; + } + +redef test_get_file_name = myfile; diff --git a/testing/btest/scripts/base/frameworks/file-analysis/logging.bro b/testing/btest/scripts/base/frameworks/file-analysis/logging.bro index 9792017962..1d1f5fd721 100644 --- a/testing/btest/scripts/base/frameworks/file-analysis/logging.bro +++ b/testing/btest/scripts/base/frameworks/file-analysis/logging.bro @@ -1,5 +1,5 @@ # @TEST-EXEC: bro -r $TRACES/http/get.trace $SCRIPTS/file-analysis-test.bro %INPUT -# @TEST-EXEC: btest-diff file_analysis.log +# @TEST-EXEC: btest-diff files.log redef test_file_analysis_source = "HTTP"; diff --git a/testing/btest/scripts/base/protocols/ftp/ftp-extract.bro b/testing/btest/scripts/base/protocols/ftp/ftp-extract.bro deleted file mode 100644 index 8cbacdbf6f..0000000000 --- a/testing/btest/scripts/base/protocols/ftp/ftp-extract.bro +++ /dev/null @@ -1,10 +0,0 @@ -# This tests FTP file extraction. -# -# @TEST-EXEC: bro -r $TRACES/ftp/ipv4.trace %INPUT -# @TEST-EXEC: btest-diff conn.log -# @TEST-EXEC: btest-diff ftp.log -# @TEST-EXEC: cat ftp-item-*.dat | sort > extractions -# @TEST-EXEC: btest-diff extractions - -redef FTP::logged_commands += {"LIST"}; -redef FTP::extract_file_types=/.*/; diff --git a/testing/btest/scripts/base/protocols/http/http-extract-files.bro b/testing/btest/scripts/base/protocols/http/http-extract-files.bro deleted file mode 100644 index 6156009821..0000000000 --- a/testing/btest/scripts/base/protocols/http/http-extract-files.bro +++ /dev/null @@ -1,6 +0,0 @@ -# @TEST-EXEC: bro -C -r $TRACES/web.trace %INPUT -# @TEST-EXEC: btest-diff http.log -# @TEST-EXEC: mv http-item-*.dat http-item.dat -# @TEST-EXEC: btest-diff http-item.dat - -redef HTTP::extract_file_types += /text\/html/; diff --git a/testing/btest/scripts/base/protocols/http/http-mime-and-md5.bro b/testing/btest/scripts/base/protocols/http/http-mime-and-md5.bro deleted file mode 100644 index b35e491b4d..0000000000 --- a/testing/btest/scripts/base/protocols/http/http-mime-and-md5.bro +++ /dev/null @@ -1,6 +0,0 @@ -# This tests md5 calculation for a specified mime type. - -# @TEST-EXEC: bro -r $TRACES/http/pipelined-requests.trace %INPUT > output -# @TEST-EXEC: btest-diff http.log - -redef HTTP::generate_md5 += /image\/png/; diff --git a/testing/btest/scripts/base/protocols/http/multipart-extract.bro b/testing/btest/scripts/base/protocols/http/multipart-extract.bro index c2789750a3..a919a844b2 100644 --- a/testing/btest/scripts/base/protocols/http/multipart-extract.bro +++ b/testing/btest/scripts/base/protocols/http/multipart-extract.bro @@ -1,5 +1,9 @@ # @TEST-EXEC: bro -C -r $TRACES/http/multipart.trace %INPUT # @TEST-EXEC: btest-diff http.log -# @TEST-EXEC: cat http-item-* | sort > extractions +# @TEST-EXEC: cat extract_files/http-item-* | sort > extractions -redef HTTP::extract_file_types += /.*/; +event file_new(f: fa_file) + { + local fname = fmt("http-item-%s", f$id); + Files::add_analyzer(f, Files::ANALYZER_EXTRACT, [$extract_filename=fname]); + } diff --git a/testing/btest/scripts/base/protocols/irc/dcc-extract.test b/testing/btest/scripts/base/protocols/irc/dcc-extract.test deleted file mode 100644 index cbfc6890da..0000000000 --- a/testing/btest/scripts/base/protocols/irc/dcc-extract.test +++ /dev/null @@ -1,11 +0,0 @@ -# This tests that the contents of a DCC transfer negotiated with IRC can be -# correctly extracted. - -# @TEST-EXEC: bro -r $TRACES/irc-dcc-send.trace %INPUT -# @TEST-EXEC: btest-diff irc.log -# @TEST-EXEC: mv irc-dcc-item-*.dat irc-dcc-item.dat -# @TEST-EXEC: btest-diff irc-dcc-item.dat -# @TEST-EXEC: bro -r $TRACES/irc-dcc-send.trace %INPUT IRC::extraction_prefix="test" -# @TEST-EXEC: test -e test-*.dat - -redef IRC::extract_file_types=/.*/; diff --git a/testing/btest/scripts/base/protocols/smtp/mime-extract.test b/testing/btest/scripts/base/protocols/smtp/mime-extract.test deleted file mode 100644 index 0caa5d530c..0000000000 --- a/testing/btest/scripts/base/protocols/smtp/mime-extract.test +++ /dev/null @@ -1,11 +0,0 @@ -# @TEST-EXEC: bro -r $TRACES/smtp.trace %INPUT -# @TEST-EXEC: btest-diff smtp_entities.log -# @TEST-EXEC: cat smtp-entity-*.dat | sort > extractions -# @TEST-EXEC: btest-diff extractions -# @TEST-EXEC: bro -r $TRACES/smtp.trace %INPUT SMTP::extraction_prefix="test" -# @TEST-EXEC: cnt=0 && for f in test-*.dat; do cnt=$((cnt+1)); done && echo $cnt >filecount -# @TEST-EXEC: btest-diff filecount - -@load base/protocols/smtp - -redef SMTP::extract_file_types=/text\/plain/; diff --git a/testing/external/subdir-btest.cfg b/testing/external/subdir-btest.cfg index b631ba2457..31fce50adc 100644 --- a/testing/external/subdir-btest.cfg +++ b/testing/external/subdir-btest.cfg @@ -7,7 +7,7 @@ IgnoreFiles = *.tmp *.swp #* *.trace .gitignore *.skeleton [environment] BROPATH=`bash -c %(testbase)s/../../../build/bro-path-dev`:%(testbase)s/../scripts -BROMAGIC=%(testbase)s/../../../magic +BROMAGIC=%(testbase)s/../../magic/database BRO_SEED_FILE=%(testbase)s/../random.seed TZ=UTC LC_ALL=C diff --git a/testing/scripts/file-analysis-test.bro b/testing/scripts/file-analysis-test.bro index 8fe78b218e..d84fadae5c 100644 --- a/testing/scripts/file-analysis-test.bro +++ b/testing/scripts/file-analysis-test.bro @@ -1,3 +1,7 @@ +@load base/files/extract +@load base/files/hash + +redef FileExtract::prefix = "./"; global test_file_analysis_source: string = "" &redef; From fb029617a4a8695f5ffffa75721ff978eed58d35 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Fri, 26 Jul 2013 16:38:18 -0400 Subject: [PATCH 110/118] Update the last two btest FAF tests. - Small changes were done to the ftp log. --- .../ftp.log | 20 ++++++++--------- .../ftp.log | 22 +++++++++---------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv4/ftp.log b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv4/ftp.log index afa4c97830..b75d6955ba 100644 --- a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv4/ftp.log +++ b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv4/ftp.log @@ -3,13 +3,13 @@ #empty_field (empty) #unset_field - #path ftp -#open 2013-04-12-16-32-25 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p user password command arg mime_type file_size reply_code reply_msg tags data_channel.passive data_channel.orig_h data_channel.resp_h data_channel.resp_p extraction_file -#types time string addr port addr port string string string string string count count string table[string] bool addr addr port string -1329843175.680248 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PASV - - - 227 Entering Passive Mode (199,233,217,249,221,90) (empty) T 141.142.220.235 199.233.217.249 56666 - -1329843179.815947 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PASV - - - 227 Entering Passive Mode (199,233,217,249,221,91) (empty) T 141.142.220.235 199.233.217.249 56667 - -1329843179.926563 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test RETR ftp://199.233.217.249/./robots.txt text/plain 77 226 Transfer complete. (empty) - - - - - -1329843194.040188 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PORT 141,142,220,235,131,46 - - 200 PORT command successful. (empty) F 199.233.217.249 141.142.220.235 33582 - -1329843197.672179 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PORT 141,142,220,235,147,203 - - 200 PORT command successful. (empty) F 199.233.217.249 141.142.220.235 37835 - -1329843197.727769 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test RETR ftp://199.233.217.249/./robots.txt text/plain 77 226 Transfer complete. (empty) - - - - - -#close 2013-04-12-16-32-25 +#open 2013-07-26-20-37-01 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p user password command arg mime_type file_size reply_code reply_msg data_channel.passive data_channel.orig_h data_channel.resp_h data_channel.resp_p fuid +#types time string addr port addr port string string string string string count count string bool addr addr port string +1329843175.680248 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PASV - - - 227 Entering Passive Mode (199,233,217,249,221,90) T 141.142.220.235 199.233.217.249 56666 - +1329843179.815947 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PASV - - - 227 Entering Passive Mode (199,233,217,249,221,91) T 141.142.220.235 199.233.217.249 56667 - +1329843179.926563 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test RETR robots.txt text/plain 77 226 Transfer complete. - - - - 4VAnSiNGSQh +1329843194.040188 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PORT 141,142,220,235,131,46 - - 200 PORT command successful. F 199.233.217.249 141.142.220.235 33582 4VAnSiNGSQh +1329843197.672179 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PORT 141,142,220,235,147,203 - - 200 PORT command successful. F 199.233.217.249 141.142.220.235 37835 4VAnSiNGSQh +1329843197.727769 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test RETR robots.txt text/plain 77 226 Transfer complete. - - - - aJg8mtdsS86 +#close 2013-07-26-20-37-01 diff --git a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv6/ftp.log b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv6/ftp.log index 85207806c4..4177c52e1f 100644 --- a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv6/ftp.log +++ b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv6/ftp.log @@ -3,14 +3,14 @@ #empty_field (empty) #unset_field - #path ftp -#open 2013-04-12-16-32-25 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p user password command arg mime_type file_size reply_code reply_msg tags data_channel.passive data_channel.orig_h data_channel.resp_h data_channel.resp_p extraction_file -#types time string addr port addr port string string string string string count count string table[string] bool addr addr port string -1329327783.207785 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPSV - - - 229 Entering Extended Passive Mode (|||57086|) (empty) T 2001:470:1f11:81f:c999:d94:aa7c:2e3e 2001:470:4867:99::21 57086 - -1329327786.415755 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPSV - - - 229 Entering Extended Passive Mode (|||57087|) (empty) T 2001:470:1f11:81f:c999:d94:aa7c:2e3e 2001:470:4867:99::21 57087 - -1329327787.180814 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPSV - - - 229 Entering Extended Passive Mode (|||57088|) (empty) T 2001:470:1f11:81f:c999:d94:aa7c:2e3e 2001:470:4867:99::21 57088 - -1329327787.396984 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test RETR ftp://[2001:470:4867:99::21]/robots.txt - 77 226 Transfer complete. (empty) - - - - - -1329327795.355248 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPRT |2|2001:470:1f11:81f:c999:d94:aa7c:2e3e|49189| - - 200 EPRT command successful. (empty) F 2001:470:4867:99::21 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49189 - -1329327795.463946 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test RETR ftp://[2001:470:4867:99::21]/robots.txt - 77 226 Transfer complete. (empty) - - - - - -1329327799.799327 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPRT |2|2001:470:1f11:81f:c999:d94:aa7c:2e3e|49190| - - 200 EPRT command successful. (empty) F 2001:470:4867:99::21 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49190 - -#close 2013-04-12-16-32-25 +#open 2013-07-26-20-37-22 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p user password command arg mime_type file_size reply_code reply_msg data_channel.passive data_channel.orig_h data_channel.resp_h data_channel.resp_p fuid +#types time string addr port addr port string string string string string count count string bool addr addr port string +1329327783.207785 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPSV - - - 229 Entering Extended Passive Mode (|||57086|) T 2001:470:1f11:81f:c999:d94:aa7c:2e3e 2001:470:4867:99::21 57086 - +1329327786.415755 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPSV - - - 229 Entering Extended Passive Mode (|||57087|) T 2001:470:1f11:81f:c999:d94:aa7c:2e3e 2001:470:4867:99::21 57087 - +1329327787.180814 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPSV - - - 229 Entering Extended Passive Mode (|||57088|) T 2001:470:1f11:81f:c999:d94:aa7c:2e3e 2001:470:4867:99::21 57088 - +1329327787.396984 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test RETR robots.txt - 77 226 Transfer complete. - - - - - +1329327795.355248 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPRT |2|2001:470:1f11:81f:c999:d94:aa7c:2e3e|49189| - - 200 EPRT command successful. F 2001:470:4867:99::21 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49189 4YhNtGvCehl +1329327795.463946 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test RETR robots.txt - 77 226 Transfer complete. - - - - 4YhNtGvCehl +1329327799.799327 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPRT |2|2001:470:1f11:81f:c999:d94:aa7c:2e3e|49190| - - 200 EPRT command successful. F 2001:470:4867:99::21 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49190 4YhNtGvCehl +#close 2013-07-26-20-37-22 From 1238e5bcf2b6b05471a2b0599c75f9a9e6a4a5ed Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Fri, 26 Jul 2013 21:50:19 -0400 Subject: [PATCH 111/118] Undoing the FTP tests I updated earlier. - Fixed the external tests btest config too. --- scripts/base/protocols/ftp/main.bro | 8 +++++++- .../Baseline/scripts.base.protocols.ftp.ftp-ipv4/ftp.log | 8 ++++---- .../Baseline/scripts.base.protocols.ftp.ftp-ipv6/ftp.log | 8 ++++---- testing/external/subdir-btest.cfg | 2 +- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/scripts/base/protocols/ftp/main.bro b/scripts/base/protocols/ftp/main.bro index f525c7792b..c9549a14ec 100644 --- a/scripts/base/protocols/ftp/main.bro +++ b/scripts/base/protocols/ftp/main.bro @@ -102,6 +102,8 @@ export { global log_ftp: event(rec: Info); } +@load ./utils + # Add the state tracking information variable to the connection record redef record connection += { ftp: Info &optional; @@ -171,7 +173,11 @@ function ftp_message(s: Info) { s$ts=s$cmdarg$ts; s$command=s$cmdarg$cmd; - s$arg=s$cmdarg$arg; + + s$arg = s$cmdarg$arg; + if ( s$cmdarg$cmd in file_cmds ) + s$arg = build_url_ftp(s); + if ( s$arg == "" ) delete s$arg; diff --git a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv4/ftp.log b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv4/ftp.log index b75d6955ba..4cc6d67761 100644 --- a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv4/ftp.log +++ b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv4/ftp.log @@ -3,13 +3,13 @@ #empty_field (empty) #unset_field - #path ftp -#open 2013-07-26-20-37-01 +#open 2013-07-27-01-49-02 #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p user password command arg mime_type file_size reply_code reply_msg data_channel.passive data_channel.orig_h data_channel.resp_h data_channel.resp_p fuid #types time string addr port addr port string string string string string count count string bool addr addr port string 1329843175.680248 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PASV - - - 227 Entering Passive Mode (199,233,217,249,221,90) T 141.142.220.235 199.233.217.249 56666 - 1329843179.815947 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PASV - - - 227 Entering Passive Mode (199,233,217,249,221,91) T 141.142.220.235 199.233.217.249 56667 - -1329843179.926563 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test RETR robots.txt text/plain 77 226 Transfer complete. - - - - 4VAnSiNGSQh +1329843179.926563 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test RETR ftp://199.233.217.249/./robots.txt text/plain 77 226 Transfer complete. - - - - 4VAnSiNGSQh 1329843194.040188 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PORT 141,142,220,235,131,46 - - 200 PORT command successful. F 199.233.217.249 141.142.220.235 33582 4VAnSiNGSQh 1329843197.672179 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PORT 141,142,220,235,147,203 - - 200 PORT command successful. F 199.233.217.249 141.142.220.235 37835 4VAnSiNGSQh -1329843197.727769 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test RETR robots.txt text/plain 77 226 Transfer complete. - - - - aJg8mtdsS86 -#close 2013-07-26-20-37-01 +1329843197.727769 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test RETR ftp://199.233.217.249/./robots.txt text/plain 77 226 Transfer complete. - - - - aJg8mtdsS86 +#close 2013-07-27-01-49-02 diff --git a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv6/ftp.log b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv6/ftp.log index 4177c52e1f..d6f57bcf45 100644 --- a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv6/ftp.log +++ b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv6/ftp.log @@ -3,14 +3,14 @@ #empty_field (empty) #unset_field - #path ftp -#open 2013-07-26-20-37-22 +#open 2013-07-27-01-49-13 #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p user password command arg mime_type file_size reply_code reply_msg data_channel.passive data_channel.orig_h data_channel.resp_h data_channel.resp_p fuid #types time string addr port addr port string string string string string count count string bool addr addr port string 1329327783.207785 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPSV - - - 229 Entering Extended Passive Mode (|||57086|) T 2001:470:1f11:81f:c999:d94:aa7c:2e3e 2001:470:4867:99::21 57086 - 1329327786.415755 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPSV - - - 229 Entering Extended Passive Mode (|||57087|) T 2001:470:1f11:81f:c999:d94:aa7c:2e3e 2001:470:4867:99::21 57087 - 1329327787.180814 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPSV - - - 229 Entering Extended Passive Mode (|||57088|) T 2001:470:1f11:81f:c999:d94:aa7c:2e3e 2001:470:4867:99::21 57088 - -1329327787.396984 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test RETR robots.txt - 77 226 Transfer complete. - - - - - +1329327787.396984 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test RETR ftp://[2001:470:4867:99::21]/robots.txt - 77 226 Transfer complete. - - - - - 1329327795.355248 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPRT |2|2001:470:1f11:81f:c999:d94:aa7c:2e3e|49189| - - 200 EPRT command successful. F 2001:470:4867:99::21 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49189 4YhNtGvCehl -1329327795.463946 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test RETR robots.txt - 77 226 Transfer complete. - - - - 4YhNtGvCehl +1329327795.463946 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test RETR ftp://[2001:470:4867:99::21]/robots.txt - 77 226 Transfer complete. - - - - 4YhNtGvCehl 1329327799.799327 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPRT |2|2001:470:1f11:81f:c999:d94:aa7c:2e3e|49190| - - 200 EPRT command successful. F 2001:470:4867:99::21 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49190 4YhNtGvCehl -#close 2013-07-26-20-37-22 +#close 2013-07-27-01-49-13 diff --git a/testing/external/subdir-btest.cfg b/testing/external/subdir-btest.cfg index 31fce50adc..fb5873418a 100644 --- a/testing/external/subdir-btest.cfg +++ b/testing/external/subdir-btest.cfg @@ -7,7 +7,7 @@ IgnoreFiles = *.tmp *.swp #* *.trace .gitignore *.skeleton [environment] BROPATH=`bash -c %(testbase)s/../../../build/bro-path-dev`:%(testbase)s/../scripts -BROMAGIC=%(testbase)s/../../magic/database +BROMAGIC=%(testbase)s/../../../magic/database BRO_SEED_FILE=%(testbase)s/../random.seed TZ=UTC LC_ALL=C From 32f1c736f7d425b0d03deb93d5d057075737c3c1 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Mon, 29 Jul 2013 16:40:16 -0400 Subject: [PATCH 112/118] Some script reorg and a new intel extension script. - policy/frameworks/intel/seen is the new location for the scripts that push data into the intel framework for checking. - The new policy/frameworks/intel/do_notice script adds an example mechanism for data driven notices. --- doc/intel.rst | 4 +- doc/scripts/DocSourcesList.cmake | 19 ++++---- scripts/base/frameworks/intel/main.bro | 3 -- scripts/policy/frameworks/intel/do_notice.bro | 44 +++++++++++++++++++ .../frameworks/intel/{ => seen}/__load__.bro | 0 .../intel/{ => seen}/conn-established.bro | 0 .../frameworks/intel/{ => seen}/dns.bro | 0 .../intel/{ => seen}/http-host-header.bro | 0 .../frameworks/intel/{ => seen}/http-url.bro | 0 .../intel/{ => seen}/http-user-agents.bro | 0 .../intel/{ => seen}/smtp-url-extraction.bro | 0 .../frameworks/intel/{ => seen}/smtp.bro | 0 .../frameworks/intel/{ => seen}/ssl.bro | 0 .../intel/{ => seen}/where-locations.bro | 0 scripts/test-all-policy.bro | 21 ++++----- 15 files changed, 67 insertions(+), 24 deletions(-) create mode 100644 scripts/policy/frameworks/intel/do_notice.bro rename scripts/policy/frameworks/intel/{ => seen}/__load__.bro (100%) rename scripts/policy/frameworks/intel/{ => seen}/conn-established.bro (100%) rename scripts/policy/frameworks/intel/{ => seen}/dns.bro (100%) rename scripts/policy/frameworks/intel/{ => seen}/http-host-header.bro (100%) rename scripts/policy/frameworks/intel/{ => seen}/http-url.bro (100%) rename scripts/policy/frameworks/intel/{ => seen}/http-user-agents.bro (100%) rename scripts/policy/frameworks/intel/{ => seen}/smtp-url-extraction.bro (100%) rename scripts/policy/frameworks/intel/{ => seen}/smtp.bro (100%) rename scripts/policy/frameworks/intel/{ => seen}/ssl.bro (100%) rename scripts/policy/frameworks/intel/{ => seen}/where-locations.bro (100%) diff --git a/doc/intel.rst b/doc/intel.rst index 2a59a98974..787524a417 100644 --- a/doc/intel.rst +++ b/doc/intel.rst @@ -27,7 +27,7 @@ Quick Start Load the package of scripts that sends data into the Intelligence Framework to be checked by loading this script in local.bro:: - @load policy/frameworks/intel + @load policy/frameworks/intel/seen Refer to the "Loading Intelligence" section below to see the format for Intelligence Framework text files, then load those text files with @@ -100,7 +100,7 @@ The full package of hook scripts that Bro ships with for sending this "seen" data into the intelligence framework can be loading by adding this line to local.bro:: - @load policy/frameworks/intel + @load policy/frameworks/intel/seen Intelligence Matches ******************** diff --git a/doc/scripts/DocSourcesList.cmake b/doc/scripts/DocSourcesList.cmake index 26a88027ef..f507172161 100644 --- a/doc/scripts/DocSourcesList.cmake +++ b/doc/scripts/DocSourcesList.cmake @@ -183,15 +183,16 @@ rest_target(${psd} policy/frameworks/control/controllee.bro) rest_target(${psd} policy/frameworks/control/controller.bro) rest_target(${psd} policy/frameworks/dpd/detect-protocols.bro) rest_target(${psd} policy/frameworks/dpd/packet-segment-logging.bro) -rest_target(${psd} policy/frameworks/intel/conn-established.bro) -rest_target(${psd} policy/frameworks/intel/dns.bro) -rest_target(${psd} policy/frameworks/intel/http-host-header.bro) -rest_target(${psd} policy/frameworks/intel/http-url.bro) -rest_target(${psd} policy/frameworks/intel/http-user-agents.bro) -rest_target(${psd} policy/frameworks/intel/smtp-url-extraction.bro) -rest_target(${psd} policy/frameworks/intel/smtp.bro) -rest_target(${psd} policy/frameworks/intel/ssl.bro) -rest_target(${psd} policy/frameworks/intel/where-locations.bro) +rest_target(${psd} policy/frameworks/intel/do_notice.bro) +rest_target(${psd} policy/frameworks/intel/seen/conn-established.bro) +rest_target(${psd} policy/frameworks/intel/seen/dns.bro) +rest_target(${psd} policy/frameworks/intel/seen/http-host-header.bro) +rest_target(${psd} policy/frameworks/intel/seen/http-url.bro) +rest_target(${psd} policy/frameworks/intel/seen/http-user-agents.bro) +rest_target(${psd} policy/frameworks/intel/seen/smtp-url-extraction.bro) +rest_target(${psd} policy/frameworks/intel/seen/smtp.bro) +rest_target(${psd} policy/frameworks/intel/seen/ssl.bro) +rest_target(${psd} policy/frameworks/intel/seen/where-locations.bro) rest_target(${psd} policy/frameworks/packet-filter/shunt.bro) rest_target(${psd} policy/frameworks/software/version-changes.bro) rest_target(${psd} policy/frameworks/software/vulnerable.bro) diff --git a/scripts/base/frameworks/intel/main.bro b/scripts/base/frameworks/intel/main.bro index 1b740f538d..a201a7a041 100644 --- a/scripts/base/frameworks/intel/main.bro +++ b/scripts/base/frameworks/intel/main.bro @@ -63,9 +63,6 @@ export { IN_ANYWHERE, }; - ## The $host field and combination of $str and $str_type fields are mutually - ## exclusive. These records *must* represent either an IP address being - ## seen or a string being seen. type Seen: record { ## The string if the data is about a string. indicator: string &log &optional; diff --git a/scripts/policy/frameworks/intel/do_notice.bro b/scripts/policy/frameworks/intel/do_notice.bro new file mode 100644 index 0000000000..720e29c35c --- /dev/null +++ b/scripts/policy/frameworks/intel/do_notice.bro @@ -0,0 +1,44 @@ + +@load base/frameworks/intel +@load base/frameworks/notice + +module Intel; + +export { + redef enum Notice::Type += { + ## Intel::Notice is a notice that happens when an intelligence + ## indicator is denoted to be notice-worthy. + Intel::Notice + }; + + redef record Intel::MetaData += { + ## A boolean value to allow the data itself to represent + ## if the indicator that this metadata is attached to + ## is notice worthy. + do_notice: bool &default=F; + + ## Restrictions on when notices are created to only create + ## them if the do_notice field is T and the notice was + ## seen in the indicated location. + if_in: Intel::Where &optional; + }; +} + +event Intel::match(s: Seen, items: set[Item]) + { + for ( item in items ) + { + if ( item$meta$do_notice && + (! item$meta?$if_in || s$where == item$meta$if_in) ) + { + local n = Notice::Info($note=Intel::Notice, + $msg=fmt("Intel hit on %s at %s", s$indicator, s$where), + $sub=s$indicator); + + if ( s?$conn ) + n$conn = s$conn; + + NOTICE(n); + } + } + } diff --git a/scripts/policy/frameworks/intel/__load__.bro b/scripts/policy/frameworks/intel/seen/__load__.bro similarity index 100% rename from scripts/policy/frameworks/intel/__load__.bro rename to scripts/policy/frameworks/intel/seen/__load__.bro diff --git a/scripts/policy/frameworks/intel/conn-established.bro b/scripts/policy/frameworks/intel/seen/conn-established.bro similarity index 100% rename from scripts/policy/frameworks/intel/conn-established.bro rename to scripts/policy/frameworks/intel/seen/conn-established.bro diff --git a/scripts/policy/frameworks/intel/dns.bro b/scripts/policy/frameworks/intel/seen/dns.bro similarity index 100% rename from scripts/policy/frameworks/intel/dns.bro rename to scripts/policy/frameworks/intel/seen/dns.bro diff --git a/scripts/policy/frameworks/intel/http-host-header.bro b/scripts/policy/frameworks/intel/seen/http-host-header.bro similarity index 100% rename from scripts/policy/frameworks/intel/http-host-header.bro rename to scripts/policy/frameworks/intel/seen/http-host-header.bro diff --git a/scripts/policy/frameworks/intel/http-url.bro b/scripts/policy/frameworks/intel/seen/http-url.bro similarity index 100% rename from scripts/policy/frameworks/intel/http-url.bro rename to scripts/policy/frameworks/intel/seen/http-url.bro diff --git a/scripts/policy/frameworks/intel/http-user-agents.bro b/scripts/policy/frameworks/intel/seen/http-user-agents.bro similarity index 100% rename from scripts/policy/frameworks/intel/http-user-agents.bro rename to scripts/policy/frameworks/intel/seen/http-user-agents.bro diff --git a/scripts/policy/frameworks/intel/smtp-url-extraction.bro b/scripts/policy/frameworks/intel/seen/smtp-url-extraction.bro similarity index 100% rename from scripts/policy/frameworks/intel/smtp-url-extraction.bro rename to scripts/policy/frameworks/intel/seen/smtp-url-extraction.bro diff --git a/scripts/policy/frameworks/intel/smtp.bro b/scripts/policy/frameworks/intel/seen/smtp.bro similarity index 100% rename from scripts/policy/frameworks/intel/smtp.bro rename to scripts/policy/frameworks/intel/seen/smtp.bro diff --git a/scripts/policy/frameworks/intel/ssl.bro b/scripts/policy/frameworks/intel/seen/ssl.bro similarity index 100% rename from scripts/policy/frameworks/intel/ssl.bro rename to scripts/policy/frameworks/intel/seen/ssl.bro diff --git a/scripts/policy/frameworks/intel/where-locations.bro b/scripts/policy/frameworks/intel/seen/where-locations.bro similarity index 100% rename from scripts/policy/frameworks/intel/where-locations.bro rename to scripts/policy/frameworks/intel/seen/where-locations.bro diff --git a/scripts/test-all-policy.bro b/scripts/test-all-policy.bro index 1fd34d6f2f..809fc1d1ec 100644 --- a/scripts/test-all-policy.bro +++ b/scripts/test-all-policy.bro @@ -14,16 +14,17 @@ # @load frameworks/control/controller.bro @load frameworks/dpd/detect-protocols.bro @load frameworks/dpd/packet-segment-logging.bro -@load frameworks/intel/__load__.bro -@load frameworks/intel/conn-established.bro -@load frameworks/intel/dns.bro -@load frameworks/intel/http-host-header.bro -@load frameworks/intel/http-url.bro -@load frameworks/intel/http-user-agents.bro -@load frameworks/intel/smtp-url-extraction.bro -@load frameworks/intel/smtp.bro -@load frameworks/intel/ssl.bro -@load frameworks/intel/where-locations.bro +@load frameworks/intel/do_notice.bro +@load frameworks/intel/seen/__load__.bro +@load frameworks/intel/seen/conn-established.bro +@load frameworks/intel/seen/dns.bro +@load frameworks/intel/seen/http-host-header.bro +@load frameworks/intel/seen/http-url.bro +@load frameworks/intel/seen/http-user-agents.bro +@load frameworks/intel/seen/smtp-url-extraction.bro +@load frameworks/intel/seen/smtp.bro +@load frameworks/intel/seen/ssl.bro +@load frameworks/intel/seen/where-locations.bro @load frameworks/packet-filter/shunt.bro @load frameworks/software/version-changes.bro @load frameworks/software/vulnerable.bro From 64fc80d7e4a4c1a653a16bf3d3892c50982fcffa Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Thu, 25 Jul 2013 13:31:57 -0700 Subject: [PATCH 113/118] Adding a trace with a DNSKEY RR. Still had this sitting in my inbox, but seems Bro is doing everything right. --- CHANGES | 4 ++++ VERSION | 2 +- .../scripts.base.protocols.dns.dns-key/dns.log | 10 ++++++++++ testing/btest/Traces/dns-dnskey.trace | Bin 0 -> 1110 bytes .../btest/scripts/base/protocols/dns/dns-key.bro | 4 ++++ 5 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 testing/btest/Baseline/scripts.base.protocols.dns.dns-key/dns.log create mode 100644 testing/btest/Traces/dns-dnskey.trace create mode 100644 testing/btest/scripts/base/protocols/dns/dns-key.bro diff --git a/CHANGES b/CHANGES index f4b7e43a7e..0c7235bd47 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,8 @@ +2.1-895 | 2013-07-29 14:07:35 -0700 + + * Adding a test for a DNSKEY RR. (Robin Sommer) + 2.1-894 | 2013-07-29 16:44:41 -0400 * Updates for the Intel Framework. (Seth Hall) diff --git a/VERSION b/VERSION index 3131a2159f..9e4a84ae0a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.1-894 +2.1-895 diff --git a/testing/btest/Baseline/scripts.base.protocols.dns.dns-key/dns.log b/testing/btest/Baseline/scripts.base.protocols.dns.dns-key/dns.log new file mode 100644 index 0000000000..722d2c3912 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.protocols.dns.dns-key/dns.log @@ -0,0 +1,10 @@ +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path dns +#open 2013-07-25-20-29-44 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto trans_id query qclass qclass_name qtype qtype_name rcode rcode_name AA TC RD RA Z answers TTLs rejected +#types time string addr port addr port enum count string count string count string count string bool bool bool bool count vector[string] vector[interval] bool +1359565680.761790 UWkUyAuUGXf 192.168.6.10 53209 192.168.129.36 53 udp 41477 paypal.com 1 C_INTERNET 48 DNSKEY 0 NOERROR F F T F 1 - - F +#close 2013-07-25-20-29-44 diff --git a/testing/btest/Traces/dns-dnskey.trace b/testing/btest/Traces/dns-dnskey.trace new file mode 100644 index 0000000000000000000000000000000000000000..c7a6448e7990c6717c5f8da73cb1c300bcb4f534 GIT binary patch literal 1110 zcmca|c+)~A1{MYw`2U}Qff2|l2<8mjH=mm!5Xc5$1_qw_pW+x)Crs=x;$U!PU~s++ zROui%Py4_MHm(CJ8dc8UWH4pWcg2l zAU9)JaDWG54dZb}4n{_1R%S*9#;dtA^fp+P2bQoGp4N|geL&lL*-Vp7o5g}J zOiDO>LZzj#t#jUXgDH3N+L@~@Bs=(DJ$&)9_txXvJ9M5Uv@@M)`XV5jrn9D|M{BEK zj=kgJuFnx?+O1ZkHpCv_)Yx<#=q5c-a$r2pFpt52m5G6YDI!ogN+96($re3jDJs_1%i&kO`s>{Z8M|mvw@lF;N;3?2080u$S?4>&_C<&yL@pE_lvLnHLi$pWcCLAeH59Sd_SVCU72I2Nv_F-U(Y3u zuF+j~iP2hWx@U{{ansb?GncbZ>{+uk<(5eOq;(DongKd%=l!cz_v1ZwpL^2O!*+=_ zQNB;P&ZV6-%1D@y_C%nUKX3u>mkPzOzfFq-YEE7~c%^ENUE}|3m&h%;YpkYC^3+iV LW^UFVNah9rRu9Wl literal 0 HcmV?d00001 diff --git a/testing/btest/scripts/base/protocols/dns/dns-key.bro b/testing/btest/scripts/base/protocols/dns/dns-key.bro new file mode 100644 index 0000000000..c51788c605 --- /dev/null +++ b/testing/btest/scripts/base/protocols/dns/dns-key.bro @@ -0,0 +1,4 @@ +# Making sure DNSKEY gets logged as such. +# +# @TEST-EXEC: bro -r $TRACES/dns-dnskey.trace +# @TEST-EXEC: btest-diff dns.log From c7676c5e695b0a4590a2fa18e96241455ff4970e Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Mon, 29 Jul 2013 14:29:45 -0700 Subject: [PATCH 114/118] The new magic submodule didn't get merged. --- magic | 1 + 1 file changed, 1 insertion(+) create mode 160000 magic diff --git a/magic b/magic new file mode 160000 index 0000000000..e87fe13a7b --- /dev/null +++ b/magic @@ -0,0 +1 @@ +Subproject commit e87fe13a7b776182ffc8c75076d42702f5c28fed From b76d1d07ca0d0175f57f83379612009c8c09400a Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Mon, 29 Jul 2013 15:06:07 -0700 Subject: [PATCH 115/118] Test updates. BIT-1044 #merged --- CHANGES | 40 +++++++++++++++++++ NEWS | 2 +- VERSION | 2 +- .../canonified_loaded_scripts.log | 5 ++- .../canonified_loaded_scripts.log | 7 ++-- .../http.ds.txt | 18 ++++----- testing/btest/coverage/bare-mode-errors.test | 5 ++- 7 files changed, 62 insertions(+), 17 deletions(-) diff --git a/CHANGES b/CHANGES index 0c7235bd47..1f64cc908a 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,44 @@ +2.1-930 | 2013-07-29 15:06:07 -0700 + + * Major file analysis overhaul in naming and appearance, along with + fixes and test updates. (Seth Hall and Jon Siwek) + + Includes: + + * Added protocol description functions that provide a super + compressed log representation. (Seth Hall) + + * Added mime types to http.log (Seth Hall) + + * Add jar files to the default MHR lookups. (Seth Hall) + + * Adding CAB files for MHR checking. (Seth Hall) + + * Improve malware hash registry script. + + - Include a link to a virustotal search in the notice sub message field. + - Give all information returned from Team Cymru in the notice message. + - Add more file types to match on to the default set. + + * Make the custom libmagic database a git submodule. + + * Add an is_orig parameter to file_over_new_connection event. + + * Recorrected the module name to Files. + + * Added Files::analyzer_name to get a more readable name for a + file analyzer. + + * Improved and just overall better handled multipart mime + transfers in HTTP and SMTP. HTTP now has orig_fuids and + resp_fuids log fields since multiple "files" can be transferred + with multipart mime in a single request/response pair. SMTP has + an fuids field which has file unique IDs for all parts + transferred. FTP and IRC have a log field named fuid added + because only a single file can be transferred per irc and ftp + log line. + 2.1-895 | 2013-07-29 14:07:35 -0700 * Adding a test for a DNSKEY RR. (Robin Sommer) diff --git a/NEWS b/NEWS index c3eabf5554..de2ee1b684 100644 --- a/NEWS +++ b/NEWS @@ -80,7 +80,7 @@ New Functionality with the following user-visibible functionality (some of that was already available before, but done differently): - [TODO: This will probably change with further script updates.] + [TODO: Update with changes from 984e9793db56.] - A binary input reader interfaces the input framework with file analysis, allowing to inject files on disk into Bro's diff --git a/VERSION b/VERSION index 9e4a84ae0a..cacffbfffc 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.1-895 +2.1-930 diff --git a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log index 0caafdf107..e28efc9563 100644 --- a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2013-07-25-17-54-33 +#open 2013-07-29-21-31-47 #fields name #types string scripts/base/init-bare.bro @@ -90,6 +90,7 @@ scripts/base/init-bare.bro build/scripts/base/bif/file_analysis.bif.bro scripts/base/utils/site.bro scripts/base/utils/patterns.bro + build/scripts/base/bif/__load__.bro scripts/policy/misc/loaded-scripts.bro scripts/base/utils/paths.bro -#close 2013-07-25-19-59-47 +#close 2013-07-29-21-31-47 diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log index deffbe364b..faf372222b 100644 --- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2013-07-23-05-48-10 +#open 2013-07-29-21-31-48 #fields name #types string scripts/base/init-bare.bro @@ -90,6 +90,7 @@ scripts/base/init-bare.bro build/scripts/base/bif/file_analysis.bif.bro scripts/base/utils/site.bro scripts/base/utils/patterns.bro + build/scripts/base/bif/__load__.bro scripts/base/init-default.bro scripts/base/utils/addrs.bro scripts/base/utils/conn-ids.bro @@ -158,7 +159,7 @@ scripts/base/init-default.bro scripts/base/protocols/ftp/__load__.bro scripts/base/protocols/ftp/utils-commands.bro scripts/base/protocols/ftp/main.bro - scripts/base/protocols/ftp/utils.bro + scripts/base/protocols/ftp/utils.bro scripts/base/protocols/ftp/files.bro scripts/base/protocols/ftp/gridftp.bro scripts/base/protocols/ssl/__load__.bro @@ -197,4 +198,4 @@ scripts/base/init-default.bro scripts/base/files/extract/main.bro scripts/base/misc/find-checksum-offloading.bro scripts/policy/misc/loaded-scripts.bro -#close 2013-07-23-05-48-10 +#close 2013-07-29-21-31-48 diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt index e919233b79..fd998057f3 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt @@ -32,10 +32,10 @@ - - - - + + + + @@ -60,13 +60,13 @@ - - - - + + + + # Extent, type='http' -ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extracted_request_files extracted_response_files +ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied orig_fuids orig_mime_types resp_fuids resp_mime_types 1300475168.784020 j4u32Pc5bif 141.142.220.118 48649 208.80.152.118 80 1 GET bits.wikimedia.org /skins-1.5/monobook/main.css http://www.wikipedia.org/ Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.15) Gecko/20110303 Ubuntu/10.04 (lucid) Firefox/3.6.15 0 0 304 Not Modified 0 1300475168.916018 VW0XPVINV8a 141.142.220.118 49997 208.80.152.3 80 1 GET upload.wikimedia.org /wikipedia/commons/6/63/Wikipedia-logo.png http://www.wikipedia.org/ Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.15) Gecko/20110303 Ubuntu/10.04 (lucid) Firefox/3.6.15 0 0 304 Not Modified 0 1300475168.916183 3PKsZ2Uye21 141.142.220.118 49996 208.80.152.3 80 1 GET upload.wikimedia.org /wikipedia/commons/thumb/b/bb/Wikipedia_wordmark.svg/174px-Wikipedia_wordmark.svg.png http://www.wikipedia.org/ Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.15) Gecko/20110303 Ubuntu/10.04 (lucid) Firefox/3.6.15 0 0 304 Not Modified 0 diff --git a/testing/btest/coverage/bare-mode-errors.test b/testing/btest/coverage/bare-mode-errors.test index 34ba063081..1910ef8e17 100644 --- a/testing/btest/coverage/bare-mode-errors.test +++ b/testing/btest/coverage/bare-mode-errors.test @@ -10,5 +10,8 @@ # # @TEST-EXEC: test -d $DIST/scripts # @TEST-EXEC: for script in `find $DIST/scripts/ -name \*\.bro -not -path '*/site/*'`; do echo "=== $script" >>allerrors; if echo "$script" | egrep -q 'communication/listen|controllee'; then rm -rf load_attempt .bgprocs; btest-bg-run load_attempt bro -b $script; btest-bg-wait -k 2; cat load_attempt/.stderr >>allerrors; else bro -b $script 2>>allerrors; fi done || exit 0 -# @TEST-EXEC: cat allerrors | grep -v "received termination signal" | grep -v '===' | sort | uniq > unique_errors +# @TEST-EXEC: cat allerrors | grep -v "received termination signal" | fgrep -v -f %INPUT | grep -v '===' | sort | uniq > unique_errors # @TEST-EXEC: btest-diff unique_errors + +# White-list of tests to exclude because of cyclic load dependencies. +scripts/base/protocols/ftp/utils.bro From c30fa36d14382c03d08f545002a33f21eb778cfe Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Mon, 29 Jul 2013 16:39:40 -0700 Subject: [PATCH 116/118] Updating submodule(s). [nomail] --- aux/binpac | 2 +- aux/bro-aux | 2 +- aux/broccoli | 2 +- aux/broctl | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/aux/binpac b/aux/binpac index 896ddedde5..314fa8f65f 160000 --- a/aux/binpac +++ b/aux/binpac @@ -1 +1 @@ -Subproject commit 896ddedde55c48ec2163577fc258b49c418abb3e +Subproject commit 314fa8f65fc240e960c23c3bba98623436a72b98 diff --git a/aux/bro-aux b/aux/bro-aux index a9942558c7..91d258cc8b 160000 --- a/aux/bro-aux +++ b/aux/bro-aux @@ -1 +1 @@ -Subproject commit a9942558c7d3dfd80148b8aaded64c82ade3d117 +Subproject commit 91d258cc8b2f74cd02fc93dfe61f73ec9f0dd489 diff --git a/aux/broccoli b/aux/broccoli index 889f9c6594..d59c73b6e0 160000 --- a/aux/broccoli +++ b/aux/broccoli @@ -1 +1 @@ -Subproject commit 889f9c65944ceac20ad9230efc39d33e6e1221c3 +Subproject commit d59c73b6e0966ad63bbc63a35741b5f68263e7b1 diff --git a/aux/broctl b/aux/broctl index 0cd102805e..52fd91261f 160000 --- a/aux/broctl +++ b/aux/broctl @@ -1 +1 @@ -Subproject commit 0cd102805e73343cab3f9fd4a76552e13940dad9 +Subproject commit 52fd91261f41fa1528f7b964837a364d7991889e From 43825212db25ce540c6a12905844d246f8784c05 Mon Sep 17 00:00:00 2001 From: Matthias Vallentin Date: Tue, 30 Jul 2013 12:17:53 +0200 Subject: [PATCH 117/118] Update submodules. --- aux/binpac | 2 +- aux/bro-aux | 2 +- aux/broccoli | 2 +- aux/broctl | 2 +- cmake | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/aux/binpac b/aux/binpac index c39bd478b9..314fa8f65f 160000 --- a/aux/binpac +++ b/aux/binpac @@ -1 +1 @@ -Subproject commit c39bd478b9d0ecd05b1b83aa9d09a7887893977c +Subproject commit 314fa8f65fc240e960c23c3bba98623436a72b98 diff --git a/aux/bro-aux b/aux/bro-aux index a9942558c7..91d258cc8b 160000 --- a/aux/bro-aux +++ b/aux/bro-aux @@ -1 +1 @@ -Subproject commit a9942558c7d3dfd80148b8aaded64c82ade3d117 +Subproject commit 91d258cc8b2f74cd02fc93dfe61f73ec9f0dd489 diff --git a/aux/broccoli b/aux/broccoli index 889f9c6594..d59c73b6e0 160000 --- a/aux/broccoli +++ b/aux/broccoli @@ -1 +1 @@ -Subproject commit 889f9c65944ceac20ad9230efc39d33e6e1221c3 +Subproject commit d59c73b6e0966ad63bbc63a35741b5f68263e7b1 diff --git a/aux/broctl b/aux/broctl index 0cd102805e..52fd91261f 160000 --- a/aux/broctl +++ b/aux/broctl @@ -1 +1 @@ -Subproject commit 0cd102805e73343cab3f9fd4a76552e13940dad9 +Subproject commit 52fd91261f41fa1528f7b964837a364d7991889e diff --git a/cmake b/cmake index 0187b33a29..026639f836 160000 --- a/cmake +++ b/cmake @@ -1 +1 @@ -Subproject commit 0187b33a29d5ec824f940feff60dc5d8c2fe314f +Subproject commit 026639f8368e56742c0cb5d9fb390ea64e60ec50 From af9e181731b82167187b7a9ec8995b991920c0e1 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Tue, 30 Jul 2013 10:29:27 -0700 Subject: [PATCH 118/118] Updating submodule(s). [nomail] --- magic | 1 + 1 file changed, 1 insertion(+) create mode 160000 magic diff --git a/magic b/magic new file mode 160000 index 0000000000..e87fe13a7b --- /dev/null +++ b/magic @@ -0,0 +1 @@ +Subproject commit e87fe13a7b776182ffc8c75076d42702f5c28fed