diff --git a/CHANGES b/CHANGES index 1f64cc908a..3d30b3d195 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,14 @@ +2.1-939 | 2013-07-29 15:42:38 -0700 + + * Added Exec, Dir, and ActiveHTTP modules. (Seth Hall) + + base/utils/exec.bro provides a module to start external processes + asynchronously and retrieve their output on termination. + base/utils/dir.bro uses it to monitor a directory for changes, and + base/utils/active-http.bro for providing an interface for querying + remote web servers. + 2.1-930 | 2013-07-29 15:06:07 -0700 * Major file analysis overhaul in naming and appearance, along with diff --git a/NEWS b/NEWS index de2ee1b684..c421e7d675 100644 --- a/NEWS +++ b/NEWS @@ -121,6 +121,12 @@ New Functionality See for full documentation. +- base/utils/exec.bro provides a module to start external processes + asynchronously and retrieve their output on termination. + base/utils/dir.bro uses it to monitor a directory for changes, and + base/utils/active-http.bro for providing an interface for querying + remote web servers. + Changed Functionality ~~~~~~~~~~~~~~~~~~~~~ diff --git a/VERSION b/VERSION index cacffbfffc..4ebbf81a9f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.1-930 +2.1-939 diff --git a/doc/scripts/DocSourcesList.cmake b/doc/scripts/DocSourcesList.cmake index c5b3f7cbbb..2efa45ef38 100644 --- a/doc/scripts/DocSourcesList.cmake +++ b/doc/scripts/DocSourcesList.cmake @@ -164,9 +164,12 @@ rest_target(${psd} base/protocols/ssl/main.bro) rest_target(${psd} base/protocols/ssl/mozilla-ca-list.bro) rest_target(${psd} base/protocols/syslog/consts.bro) rest_target(${psd} base/protocols/syslog/main.bro) +rest_target(${psd} base/utils/active-http.bro) rest_target(${psd} base/utils/addrs.bro) rest_target(${psd} base/utils/conn-ids.bro) +rest_target(${psd} base/utils/dir.bro) rest_target(${psd} base/utils/directions-and-hosts.bro) +rest_target(${psd} base/utils/exec.bro) rest_target(${psd} base/utils/files.bro) rest_target(${psd} base/utils/numbers.bro) rest_target(${psd} base/utils/paths.bro) diff --git a/scripts/base/init-default.bro b/scripts/base/init-default.bro index 72ba0bf115..6e348cfffd 100644 --- a/scripts/base/init-default.bro +++ b/scripts/base/init-default.bro @@ -5,9 +5,12 @@ ##! you actually want. @load base/utils/site +@load base/utils/active-http @load base/utils/addrs @load base/utils/conn-ids +@load base/utils/dir @load base/utils/directions-and-hosts +@load base/utils/exec @load base/utils/files @load base/utils/numbers @load base/utils/paths diff --git a/scripts/base/utils/active-http.bro b/scripts/base/utils/active-http.bro new file mode 100644 index 0000000000..eb9a212221 --- /dev/null +++ b/scripts/base/utils/active-http.bro @@ -0,0 +1,123 @@ +##! A module for performing active HTTP requests and +##! getting the reply at runtime. + +@load ./exec + +module ActiveHTTP; + +export { + ## The default timeout for HTTP requests. + const default_max_time = 1min &redef; + + ## The default HTTP method/verb to use for requests. + const default_method = "GET" &redef; + + type Response: record { + ## Numeric response code from the server. + code: count; + ## String response message from the server. + msg: string; + ## Full body of the response. + body: string &optional; + ## All headers returned by the server. + headers: table[string] of string &optional; + }; + + type Request: record { + ## The URL being requested. + url: string; + ## The HTTP method/verb to use for the request. + method: string &default=default_method; + ## Data to send to the server in the client body. Keep in + ## mind that you will probably need to set the *method* field + ## to "POST" or "PUT". + client_data: string &optional; + ## Arbitrary headers to pass to the server. Some headers + ## will be included by libCurl. + #custom_headers: table[string] of string &optional; + ## Timeout for the request. + max_time: interval &default=default_max_time; + ## Additional curl command line arguments. Be very careful + ## with this option since shell injection could take place + ## if careful handling of untrusted data is not applied. + addl_curl_args: string &optional; + }; + + ## Perform an HTTP request according to the :bro:type:`Request` record. + ## This is an asynchronous function and must be called within a "when" + ## statement. + ## + ## req: A record instance representing all options for an HTTP request. + ## + ## Returns: A record with the full response message. + global request: function(req: ActiveHTTP::Request): ActiveHTTP::Response; +} + +function request2curl(r: Request, bodyfile: string, headersfile: string): string + { + local cmd = fmt("curl -s -g -o \"%s\" -D \"%s\" -X \"%s\"", + str_shell_escape(bodyfile), + str_shell_escape(headersfile), + str_shell_escape(r$method)); + + cmd = fmt("%s -m %.0f", cmd, r$max_time); + + if ( r?$client_data ) + cmd = fmt("%s -d -", cmd); + + if ( r?$addl_curl_args ) + cmd = fmt("%s %s", cmd, r$addl_curl_args); + + cmd = fmt("%s \"%s\"", cmd, str_shell_escape(r$url)); + return cmd; + } + +function request(req: Request): ActiveHTTP::Response + { + local tmpfile = "/tmp/bro-activehttp-" + unique_id(""); + local bodyfile = fmt("%s_body", tmpfile); + local headersfile = fmt("%s_headers", tmpfile); + + local cmd = request2curl(req, bodyfile, headersfile); + local stdin_data = req?$client_data ? req$client_data : ""; + + local resp: Response; + resp$code = 0; + resp$msg = ""; + resp$body = ""; + resp$headers = table(); + return when ( local result = Exec::run([$cmd=cmd, $stdin=stdin_data, $read_files=set(bodyfile, headersfile)]) ) + { + # If there is no response line then nothing else will work either. + if ( ! (result?$files && headersfile in result$files) ) + { + Reporter::error(fmt("There was a failure when requesting \"%s\" with ActiveHTTP.", req$url)); + return resp; + } + + local headers = result$files[headersfile]; + for ( i in headers ) + { + # The reply is the first line. + if ( i == 0 ) + { + local response_line = split_n(headers[0], /[[:blank:]]+/, F, 2); + if ( |response_line| != 3 ) + return resp; + + resp$code = to_count(response_line[2]); + resp$msg = response_line[3]; + resp$body = join_string_vec(result$files[bodyfile], ""); + } + else + { + local line = headers[i]; + local h = split1(line, /:/); + if ( |h| != 2 ) + next; + resp$headers[h[1]] = sub_bytes(h[2], 0, |h[2]|-1); + } + } + return resp; + } + } diff --git a/scripts/base/utils/dir.bro b/scripts/base/utils/dir.bro new file mode 100644 index 0000000000..4f3ee94945 --- /dev/null +++ b/scripts/base/utils/dir.bro @@ -0,0 +1,66 @@ +@load base/utils/exec +@load base/frameworks/reporter +@load base/utils/paths + +module Dir; + +export { + ## The default interval this module checks for files in directories when + ## using the :bro:see:`Dir::monitor` function. + const polling_interval = 30sec &redef; + + ## Register a directory to monitor with a callback that is called + ## every time a previously unseen file is seen. If a file is deleted + ## and seen to be gone, the file is available for being seen again in + ## the future. + ## + ## dir: The directory to monitor for files. + ## + ## callback: Callback that gets executed with each file name + ## that is found. Filenames are provided with the full path. + ## + ## poll_interval: An interval at which to check for new files. + global monitor: function(dir: string, callback: function(fname: string), + poll_interval: interval &default=polling_interval); +} + +event Dir::monitor_ev(dir: string, last_files: set[string], + callback: function(fname: string), + poll_interval: interval) + { + when ( local result = Exec::run([$cmd=fmt("ls -i \"%s/\"", str_shell_escape(dir))]) ) + { + if ( result$exit_code != 0 ) + { + Reporter::warning(fmt("Requested monitoring of non-existent directory (%s).", dir)); + return; + } + + local current_files: set[string] = set(); + local files: vector of string = vector(); + + if ( result?$stdout ) + files = result$stdout; + + for ( i in files ) + { + local parts = split1(files[i], / /); + if ( parts[1] !in last_files ) + callback(build_path_compressed(dir, parts[2])); + add current_files[parts[1]]; + } + + schedule poll_interval + { + Dir::monitor_ev(dir, current_files, callback, poll_interval) + }; + } + } + +function monitor(dir: string, callback: function(fname: string), + poll_interval: interval &default=polling_interval) + { + event Dir::monitor_ev(dir, set(), callback, poll_interval); + } + + diff --git a/scripts/base/utils/exec.bro b/scripts/base/utils/exec.bro new file mode 100644 index 0000000000..732bbcf34c --- /dev/null +++ b/scripts/base/utils/exec.bro @@ -0,0 +1,185 @@ +##! A module for executing external command line programs. + +@load base/frameworks/input + +module Exec; + +export { + type Command: record { + ## The command line to execute. Use care to avoid injection attacks. + ## I.e. if the command uses untrusted/variable data, sanitize + ## it with str_shell_escape(). + cmd: string; + ## Provide standard in to the program as a string. + stdin: string &default=""; + ## If additional files are required to be read in as part of the output + ## of the command they can be defined here. + read_files: set[string] &optional; + # The unique id for tracking executors. + uid: string &default=unique_id(""); + }; + + type Result: record { + ## Exit code from the program. + exit_code: count &default=0; + ## True if the command was terminated with a signal. + signal_exit: bool &default=F; + ## Each line of standard out. + stdout: vector of string &optional; + ## Each line of standard error. + stderr: vector of string &optional; + ## If additional files were requested to be read in + ## the content of the files will be available here. + files: table[string] of string_vec &optional; + }; + + ## Function for running command line programs and getting + ## output. This is an asynchronous function which is meant + ## to be run with the `when` statement. + ## + ## cmd: The command to run. Use care to avoid injection attacks! + ## + ## returns: A record representing the full results from the + ## external program execution. + global run: function(cmd: Command): Result; + + ## The system directory for temp files. + const tmp_dir = "/tmp" &redef; +} + +# Indexed by command uid. +global results: table[string] of Result; +global pending_commands: set[string]; +global pending_files: table[string] of set[string]; + +type OneLine: record { + s: string; + is_stderr: bool; +}; + +type FileLine: record { + s: string; +}; + +event Exec::line(description: Input::EventDescription, tpe: Input::Event, s: string, is_stderr: bool) + { + local result = results[description$name]; + if ( is_stderr ) + { + if ( ! result?$stderr ) + result$stderr = vector(s); + else + result$stderr[|result$stderr|] = s; + } + else + { + if ( ! result?$stdout ) + result$stdout = vector(s); + else + result$stdout[|result$stdout|] = s; + } + } + +event Exec::file_line(description: Input::EventDescription, tpe: Input::Event, s: string) + { + local parts = split1(description$name, /_/); + local name = parts[1]; + local track_file = parts[2]; + + local result = results[name]; + if ( ! result?$files ) + result$files = table(); + + if ( track_file !in result$files ) + result$files[track_file] = vector(s); + else + result$files[track_file][|result$files[track_file]|] = s; + } + +event Input::end_of_data(name: string, source:string) + { + local parts = split1(name, /_/); + name = parts[1]; + + if ( name !in pending_commands || |parts| < 2 ) + return; + + local track_file = parts[2]; + + Input::remove(name); + + if ( name !in pending_files ) + delete pending_commands[name]; + else + { + delete pending_files[name][track_file]; + if ( |pending_files[name]| == 0 ) + delete pending_commands[name]; + system(fmt("rm \"%s\"", str_shell_escape(track_file))); + } + } + +event InputRaw::process_finished(name: string, source:string, exit_code:count, signal_exit:bool) + { + if ( name !in pending_commands ) + return; + + Input::remove(name); + results[name]$exit_code = exit_code; + results[name]$signal_exit = signal_exit; + + if ( name !in pending_files || |pending_files[name]| == 0 ) + # No extra files to read, command is done. + delete pending_commands[name]; + else + for ( read_file in pending_files[name] ) + Input::add_event([$source=fmt("%s", read_file), + $name=fmt("%s_%s", name, read_file), + $reader=Input::READER_RAW, + $want_record=F, + $fields=FileLine, + $ev=Exec::file_line]); + } + +function run(cmd: Command): Result + { + add pending_commands[cmd$uid]; + results[cmd$uid] = []; + + if ( cmd?$read_files ) + { + for ( read_file in cmd$read_files ) + { + if ( cmd$uid !in pending_files ) + pending_files[cmd$uid] = set(); + add pending_files[cmd$uid][read_file]; + } + } + + local config_strings: table[string] of string = { + ["stdin"] = cmd$stdin, + ["read_stderr"] = "1", + }; + Input::add_event([$name=cmd$uid, + $source=fmt("%s |", cmd$cmd), + $reader=Input::READER_RAW, + $fields=Exec::OneLine, + $ev=Exec::line, + $want_record=F, + $config=config_strings]); + + return when ( cmd$uid !in pending_commands ) + { + local result = results[cmd$uid]; + delete results[cmd$uid]; + return result; + } + } + +event bro_done() + { + # We are punting here and just deleting any unprocessed files. + for ( uid in pending_files ) + for ( fname in pending_files[uid] ) + system(fmt("rm \"%s\"", str_shell_escape(fname))); + } diff --git a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log index e28efc9563..e65b72a30b 100644 --- a/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2013-07-29-21-31-47 +#open 2013-07-29-22-37-52 #fields name #types string scripts/base/init-bare.bro @@ -93,4 +93,4 @@ scripts/base/init-bare.bro build/scripts/base/bif/__load__.bro scripts/policy/misc/loaded-scripts.bro scripts/base/utils/paths.bro -#close 2013-07-29-21-31-47 +#close 2013-07-29-22-37-52 diff --git a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log index faf372222b..dbbf689185 100644 --- a/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log +++ b/testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log @@ -3,7 +3,7 @@ #empty_field (empty) #unset_field - #path loaded_scripts -#open 2013-07-29-21-31-48 +#open 2013-07-29-22-37-53 #fields name #types string scripts/base/init-bare.bro @@ -92,12 +92,17 @@ scripts/base/init-bare.bro scripts/base/utils/patterns.bro build/scripts/base/bif/__load__.bro scripts/base/init-default.bro + scripts/base/utils/active-http.bro + scripts/base/utils/exec.bro scripts/base/utils/addrs.bro scripts/base/utils/conn-ids.bro + scripts/base/utils/dir.bro + scripts/base/frameworks/reporter/__load__.bro + scripts/base/frameworks/reporter/main.bro + scripts/base/utils/paths.bro scripts/base/utils/directions-and-hosts.bro scripts/base/utils/files.bro scripts/base/utils/numbers.bro - scripts/base/utils/paths.bro scripts/base/utils/queue.bro scripts/base/utils/strings.bro scripts/base/utils/thresholds.bro @@ -131,8 +136,6 @@ scripts/base/init-default.bro scripts/base/frameworks/intel/__load__.bro scripts/base/frameworks/intel/main.bro scripts/base/frameworks/intel/input.bro - scripts/base/frameworks/reporter/__load__.bro - scripts/base/frameworks/reporter/main.bro scripts/base/frameworks/sumstats/__load__.bro scripts/base/frameworks/sumstats/main.bro scripts/base/frameworks/sumstats/plugins/__load__.bro @@ -198,4 +201,4 @@ scripts/base/init-default.bro scripts/base/files/extract/main.bro scripts/base/misc/find-checksum-offloading.bro scripts/policy/misc/loaded-scripts.bro -#close 2013-07-29-21-31-48 +#close 2013-07-29-22-37-53 diff --git a/testing/btest/Baseline/scripts.base.utils.active-http/bro..stdout b/testing/btest/Baseline/scripts.base.utils.active-http/bro..stdout new file mode 100644 index 0000000000..0284eb19b3 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.utils.active-http/bro..stdout @@ -0,0 +1,5 @@ +[code=200, msg=OK^M, body=It works!, headers={ +[Server] = 1.0, +[Content-type] = text/plain, +[Date] = July 22, 2013 +}] diff --git a/testing/btest/Baseline/scripts.base.utils.dir/bro..stdout b/testing/btest/Baseline/scripts.base.utils.dir/bro..stdout new file mode 100644 index 0000000000..c3103b7f64 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.utils.dir/bro..stdout @@ -0,0 +1,10 @@ +new_file1, ../testdir/bye +new_file1, ../testdir/hi +new_file1, ../testdir/howsitgoing +new_file2, ../testdir/bye +new_file2, ../testdir/hi +new_file2, ../testdir/howsitgoing +new_file1, ../testdir/bye +new_file1, ../testdir/newone +new_file2, ../testdir/bye +new_file2, ../testdir/newone diff --git a/testing/btest/Baseline/scripts.base.utils.exec/bro..stdout b/testing/btest/Baseline/scripts.base.utils.exec/bro..stdout new file mode 100644 index 0000000000..5352d15d18 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.utils.exec/bro..stdout @@ -0,0 +1,7 @@ +test1, [exit_code=0, signal_exit=F, stdout=[done, exit, stop], stderr=, files={ +[out1] = [insert text here, and here], +[out2] = [insert more text here, and there] +}] +test2, [exit_code=1, signal_exit=F, stdout=[here's something on stdout, some more stdout, last stdout], stderr=[and some stderr, more stderr, last stderr], files=] +test3, [exit_code=9, signal_exit=F, stdout=[FML], stderr=, files=] +test4, [exit_code=0, signal_exit=F, stdout=[hibye], stderr=, files=] diff --git a/testing/btest/scripts/base/utils/active-http.test b/testing/btest/scripts/base/utils/active-http.test new file mode 100644 index 0000000000..127b21d77e --- /dev/null +++ b/testing/btest/scripts/base/utils/active-http.test @@ -0,0 +1,28 @@ +# @TEST-REQUIRES: which httpd +# @TEST-REQUIRES: which python +# +# @TEST-EXEC: btest-bg-run httpd python $SCRIPTS/httpd.py --max 1 +# @TEST-EXEC: sleep 3 +# @TEST-EXEC: btest-bg-run bro bro -b %INPUT +# @TEST-EXEC: btest-bg-wait 15 +# @TEST-EXEC: btest-diff bro/.stdout + +@load base/utils/active-http + +redef exit_only_after_terminate = T; + +event bro_init() + { + local req = ActiveHTTP::Request($url="localhost:32123"); + + when ( local resp = ActiveHTTP::request(req) ) + { + print resp; + terminate(); + } + timeout 1min + { + print "HTTP request timeout"; + terminate(); + } + } diff --git a/testing/btest/scripts/base/utils/dir.test b/testing/btest/scripts/base/utils/dir.test new file mode 100644 index 0000000000..44fee3860f --- /dev/null +++ b/testing/btest/scripts/base/utils/dir.test @@ -0,0 +1,58 @@ +# @TEST-EXEC: btest-bg-run bro bro -b ../dirtest.bro +# @TEST-EXEC: btest-bg-wait 10 +# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff bro/.stdout + +@TEST-START-FILE dirtest.bro + +@load base/utils/dir + +redef exit_only_after_terminate = T; + +global c: count = 0; + +function check_terminate_condition() + { + c += 1; + + if ( c == 10 ) + terminate(); + } + +function new_file1(fname: string) + { + print "new_file1", fname; + check_terminate_condition(); + } + +function new_file2(fname: string) + { + print "new_file2", fname; + check_terminate_condition(); + } + +event change_things() + { + system("touch ../testdir/newone"); + system("rm ../testdir/bye && touch ../testdir/bye"); + } + +event bro_init() + { + Dir::monitor("../testdir", new_file1, .5sec); + Dir::monitor("../testdir", new_file2, 1sec); + schedule 1sec { change_things() }; + } + +@TEST-END-FILE + +@TEST-START-FILE testdir/hi +123 +@TEST-END-FILE + +@TEST-START-FILE testdir/howsitgoing +abc +@TEST-END-FILE + +@TEST-START-FILE testdir/bye +!@# +@TEST-END-FILE diff --git a/testing/btest/scripts/base/utils/exec.test b/testing/btest/scripts/base/utils/exec.test new file mode 100644 index 0000000000..8876f0f49b --- /dev/null +++ b/testing/btest/scripts/base/utils/exec.test @@ -0,0 +1,74 @@ +# @TEST-EXEC: btest-bg-run bro bro -b ../exectest.bro +# @TEST-EXEC: btest-bg-wait 10 +# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-sort btest-diff bro/.stdout + +@TEST-START-FILE exectest.bro + +@load base/utils/exec + +redef exit_only_after_terminate = T; + +global c: count = 0; + +function check_exit_condition() + { + c += 1; + + if ( c == 4 ) + terminate(); + } + +function test_cmd(label: string, cmd: Exec::Command) + { + when ( local result = Exec::run(cmd) ) + { + print label, result; + check_exit_condition(); + } + } + +event bro_init() + { + test_cmd("test1", [$cmd="bash ../somescript.sh", + $read_files=set("out1", "out2")]); + test_cmd("test2", [$cmd="bash ../nofiles.sh"]); + test_cmd("test3", [$cmd="bash ../suicide.sh"]); + test_cmd("test4", [$cmd="bash ../stdin.sh", $stdin="hibye"]); + } + +@TEST-END-FILE + +@TEST-START-FILE somescript.sh +#! /usr/bin/env bash +echo "insert text here" > out1 +echo "and here" >> out1 +echo "insert more text here" > out2 +echo "and there" >> out2 +echo "done" +echo "exit" +echo "stop" +@TEST-END-FILE + +@TEST-START-FILE nofiles.sh +#! /usr/bin/env bash +echo "here's something on stdout" +echo "some more stdout" +echo "last stdout" +echo "and some stderr" 1>&2 +echo "more stderr" 1>&2 +echo "last stderr" 1>&2 +exit 1 +@TEST-END-FILE + +@TEST-START-FILE suicide.sh +#! /usr/bin/env bash +echo "FML" +kill -9 $$ +echo "nope" +@TEST-END-FILE + +@TEST-START-FILE stdin.sh +#! /usr/bin/env bash +read -r line +echo "$line" +@TEST-END-FILE diff --git a/testing/scripts/httpd.py b/testing/scripts/httpd.py new file mode 100755 index 0000000000..0732614bc2 --- /dev/null +++ b/testing/scripts/httpd.py @@ -0,0 +1,40 @@ +#! /usr/bin/env python + +import BaseHTTPServer + +class MyRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): + + def do_GET(self): + self.send_response(200) + self.send_header("Content-type", "text/plain") + self.end_headers() + self.wfile.write("It works!") + + def version_string(self): + return "1.0" + + def date_time_string(self): + return "July 22, 2013" + + +if __name__ == "__main__": + from optparse import OptionParser + p = OptionParser() + p.add_option("-a", "--addr", type="string", default="localhost", + help=("listen on given address (numeric IP or host name), " + "an empty string (the default) means INADDR_ANY")) + p.add_option("-p", "--port", type="int", default=32123, + help="listen on given TCP port number") + p.add_option("-m", "--max", type="int", default=-1, + help="max number of requests to respond to, -1 means no max") + options, args = p.parse_args() + + httpd = BaseHTTPServer.HTTPServer((options.addr, options.port), + MyRequestHandler) + if options.max == -1: + httpd.serve_forever() + else: + served_count = 0 + while served_count != options.max: + httpd.handle_request() + served_count += 1