diff --git a/scripts/base/frameworks/file-analysis/main.bro b/scripts/base/frameworks/file-analysis/main.bro index 3133ab43b9..68246761cf 100644 --- a/scripts/base/frameworks/file-analysis/main.bro +++ b/scripts/base/frameworks/file-analysis/main.bro @@ -51,8 +51,10 @@ export { ## from a container file as part of the analysis. parent_file_id: string &log &optional; - ## The network protocol over which the file was transferred. - protocol: string &log &optional; + ## An identification of the source of the file data. E.g. it may be + ## a network protocol over which it was transferred, or a local file + ## path which was read, or some other input source. + source: string &log &optional; ## The set of connections over which the file was transferred, ## indicated by UID strings. diff --git a/scripts/base/frameworks/input/__load__.bro b/scripts/base/frameworks/input/__load__.bro index 0e7d8ffb73..53b2be8364 100644 --- a/scripts/base/frameworks/input/__load__.bro +++ b/scripts/base/frameworks/input/__load__.bro @@ -2,4 +2,5 @@ @load ./readers/ascii @load ./readers/raw @load ./readers/benchmark +@load ./readers/binary diff --git a/scripts/base/frameworks/input/readers/binary.bro b/scripts/base/frameworks/input/readers/binary.bro new file mode 100644 index 0000000000..abd7944ff2 --- /dev/null +++ b/scripts/base/frameworks/input/readers/binary.bro @@ -0,0 +1,8 @@ +##! Interface for the binary input reader. + +module InputBinary; + +export { + ## Size of data chunks to read from the input file at a time. + const chunk_size = 1024 &redef; +} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index dbabaebff2..e60a68a5a3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -447,6 +447,7 @@ set(bro_SRCS input/readers/Ascii.cc input/readers/Raw.cc input/readers/Benchmark.cc + input/readers/Binary.cc file_analysis/Manager.cc file_analysis/Info.cc diff --git a/src/file_analysis.bif b/src/file_analysis.bif index abe7dbbd3c..91c235edfe 100644 --- a/src/file_analysis.bif +++ b/src/file_analysis.bif @@ -93,3 +93,44 @@ function FileAnalysis::stop%(file_id: string%): bool bool result = file_mgr->IgnoreFile(FileID(file_id->CheckString())); return new Val(result, TYPE_BOOL); %} + +function FileAnalysis::input_data%(source: string, data: string%): any + %{ + string s = source->CheckString(); + string unique = "BIF " + s; + file_mgr->DataIn(unique, data->Bytes(), data->Len(), 0, s); + return 0; + %} + +function FileAnalysis::input_data_chunk%(source: string, data: string, + offset: count%): any + %{ + string s = source->CheckString(); + string unique = "BIF " + s; + file_mgr->DataIn(unique, data->Bytes(), data->Len(), offset, 0, s); + return 0; + %} + +function FileAnalysis::gap%(source: string, offset: count, len: count%): any + %{ + string s = source->CheckString(); + string unique = "BIF " + s; + file_mgr->Gap(unique, offset, len, 0, s); + return 0; + %} + +function FileAnalysis::set_size%(source: string, size: count%): any + %{ + string s = source->CheckString(); + string unique = "BIF " + s; + file_mgr->SetSize(unique, size, 0, s); + return 0; + %} + +function FileAnalysis::input_eof%(source: string%): any + %{ + string s = source->CheckString(); + string unique = "BIF "+ s; + file_mgr->EndOfFile(unique, 0, s); + return 0; + %} diff --git a/src/file_analysis/Info.cc b/src/file_analysis/Info.cc index e6ab66135f..0d74ae5d99 100644 --- a/src/file_analysis/Info.cc +++ b/src/file_analysis/Info.cc @@ -37,7 +37,7 @@ static RecordVal* get_conn_id_val(const Connection* conn) int Info::file_id_idx = -1; int Info::parent_file_id_idx = -1; -int Info::protocol_idx = -1; +int Info::source_idx = -1; int Info::conn_uids_idx = -1; int Info::conn_ids_idx = -1; int Info::seen_bytes_idx = -1; @@ -59,7 +59,7 @@ void Info::InitFieldIndices() if ( file_id_idx != -1 ) return; file_id_idx = Idx("file_id"); parent_file_id_idx = Idx("parent_file_id"); - protocol_idx = Idx("protocol"); + source_idx = Idx("source"); conn_uids_idx = Idx("conn_uids"); conn_ids_idx = Idx("conn_ids"); seen_bytes_idx = Idx("seen_bytes"); @@ -89,7 +89,7 @@ static void init_magic(magic_t* magic, int flags) } } -Info::Info(const string& unique, Connection* conn, const string& protocol) +Info::Info(const string& unique, Connection* conn, const string& source) : file_id(unique), unique(unique), val(0), last_activity_time(network_time), postpone_timeout(false), need_reassembly(false), done(false), actions(this) @@ -113,8 +113,8 @@ Info::Info(const string& unique, Connection* conn, const string& protocol) UpdateConnectionFields(conn); - if ( protocol != "" ) - val->Assign(protocol_idx, new StringVal(protocol.c_str())); + if ( ! source.empty() ) + val->Assign(source_idx, new StringVal(source.c_str())); } Info::~Info() diff --git a/src/file_analysis/Info.h b/src/file_analysis/Info.h index f6dd1d3b46..a02262c99d 100644 --- a/src/file_analysis/Info.h +++ b/src/file_analysis/Info.h @@ -117,8 +117,7 @@ protected: /** * Constructor; only file_analysis::Manager should be creating these. */ - Info(const string& unique, Connection* conn = 0, - const string& protocol = ""); + Info(const string& unique, Connection* conn = 0, const string& source = ""); /** * Updates the "conn_ids" and "conn_uids" fields in #val record with the @@ -190,7 +189,7 @@ protected: public: static int file_id_idx; static int parent_file_id_idx; - static int protocol_idx; + static int source_idx; static int conn_uids_idx; static int conn_ids_idx; static int seen_bytes_idx; diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index fa46f4b04c..85dbe8ff39 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -26,11 +26,11 @@ void Manager::Terminate() } void Manager::DataIn(const string& unique, const u_char* data, uint64 len, - uint64 offset, Connection* conn, const string& protocol) + uint64 offset, Connection* conn, const string& source) { if ( IsIgnored(unique) ) return; - Info* info = GetInfo(unique, conn, protocol); + Info* info = GetInfo(unique, conn, source); if ( ! info ) return; @@ -41,9 +41,9 @@ void Manager::DataIn(const string& unique, const u_char* data, uint64 len, } void Manager::DataIn(const string& unique, const u_char* data, uint64 len, - Connection* conn, const string& protocol) + Connection* conn, const string& source) { - Info* info = GetInfo(unique, conn, protocol); + Info* info = GetInfo(unique, conn, source); if ( ! info ) return; @@ -54,18 +54,18 @@ void Manager::DataIn(const string& unique, const u_char* data, uint64 len, } void Manager::EndOfFile(const string& unique, Connection* conn, - const string& protocol) + const string& source) { - // Just call GetInfo because maybe the conn/protocol args will update + // Just call GetInfo because maybe the conn/source args will update // something in the Info record. - GetInfo(unique, conn, protocol); + GetInfo(unique, conn, source); RemoveFile(unique); } void Manager::Gap(const string& unique, uint64 offset, uint64 len, - Connection* conn, const string& protocol) + Connection* conn, const string& source) { - Info* info = GetInfo(unique, conn, protocol); + Info* info = GetInfo(unique, conn, source); if ( ! info ) return; @@ -73,9 +73,9 @@ void Manager::Gap(const string& unique, uint64 offset, uint64 len, } void Manager::SetSize(const string& unique, uint64 size, - Connection* conn, const string& protocol) + Connection* conn, const string& source) { - Info* info = GetInfo(unique, conn, protocol); + Info* info = GetInfo(unique, conn, source); if ( ! info ) return; @@ -132,7 +132,7 @@ bool Manager::RemoveAction(const FileID& file_id, const RecordVal* args) const } Info* Manager::GetInfo(const string& unique, Connection* conn, - const string& protocol) + const string& source) { if ( IsIgnored(unique) ) return 0; @@ -140,7 +140,7 @@ Info* Manager::GetInfo(const string& unique, Connection* conn, if ( ! rval ) { - rval = str_map[unique] = new Info(unique, conn, protocol); + rval = str_map[unique] = new Info(unique, conn, source); FileID id = rval->GetFileID(); if ( id_map[id] ) diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index 257060f406..f17bdef540 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -35,31 +35,31 @@ public: */ void DataIn(const string& unique, const u_char* data, uint64 len, uint64 offset, Connection* conn = 0, - const string& protocol = ""); + const string& source = ""); /** * Pass in sequential file data. */ void DataIn(const string& unique, const u_char* data, uint64 len, - Connection* conn = 0, const string& protocol = ""); + Connection* conn = 0, const string& source = ""); /** * Signal the end of file data. */ void EndOfFile(const string& unique, Connection* conn = 0, - const string& protocol = ""); + const string& source = ""); /** * Signal a gap in the file data stream. */ void Gap(const string& unique, uint64 offset, uint64 len, - Connection* conn = 0, const string& protocol = ""); + Connection* conn = 0, const string& source = ""); /** * Provide the expected number of bytes that comprise a file. */ void SetSize(const string& unique, uint64 size, Connection* conn = 0, - const string& protocol = ""); + const string& source = ""); /** * Starts ignoring a file, which will finally be removed from internal @@ -109,7 +109,7 @@ protected: * record value may be updated. */ Info* GetInfo(const string& unique, Connection* conn = 0, - const string& protocol = ""); + const string& source = ""); /** * @return the Info object mapped to \a file_id, or a null pointer if no diff --git a/src/input.bif b/src/input.bif index 199b665fa6..d7e4de5463 100644 --- a/src/input.bif +++ b/src/input.bif @@ -57,3 +57,6 @@ const autospread: double; const addfactor: count; const stopspreadat: count; const timedspread: double; + +module InputBinary; +const chunk_size: count; diff --git a/src/input/Manager.cc b/src/input/Manager.cc index d9006d66a2..4b843cdc35 100644 --- a/src/input/Manager.cc +++ b/src/input/Manager.cc @@ -8,6 +8,7 @@ #include "readers/Ascii.h" #include "readers/Raw.h" #include "readers/Benchmark.h" +#include "readers/Binary.h" #include "Event.h" #include "EventHandler.h" @@ -34,6 +35,7 @@ ReaderDefinition input_readers[] = { { BifEnum::Input::READER_ASCII, "Ascii", 0, reader::Ascii::Instantiate }, { BifEnum::Input::READER_RAW, "Raw", 0, reader::Raw::Instantiate }, { BifEnum::Input::READER_BENCHMARK, "Benchmark", 0, reader::Benchmark::Instantiate }, + { BifEnum::Input::READER_BINARY, "Binary", 0, reader::Binary::Instantiate }, // End marker { BifEnum::Input::READER_DEFAULT, "None", 0, (ReaderBackend* (*)(ReaderFrontend* frontend))0 } diff --git a/src/input/readers/Binary.cc b/src/input/readers/Binary.cc new file mode 100644 index 0000000000..da86753303 --- /dev/null +++ b/src/input/readers/Binary.cc @@ -0,0 +1,264 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include + +#include "Binary.h" +#include "NetVar.h" + +#include "../../threading/SerialTypes.h" + +using namespace input::reader; +using threading::Value; +using threading::Field; + +streamsize Binary::chunk_size = 0; + +Binary::Binary(ReaderFrontend *frontend) + : ReaderBackend(frontend), in(0), mtime(0), firstrun(true) + { + if ( ! chunk_size ) + { + chunk_size = BifConst::InputBinary::chunk_size; + if ( ! chunk_size ) + chunk_size = 1024; + } + } + +Binary::~Binary() + { + DoClose(); + } + +void Binary::DoClose() + { + if ( in ) + CloseInput(); + } + +bool Binary::OpenInput() + { + in = new ifstream(fname.c_str(), ios_base::in | ios_base::binary); + + if ( in->fail() ) + { + Error(Fmt("Init: cannot open %s", fname.c_str())); + return false; + } + + return true; + } + +bool Binary::CloseInput() + { + if ( ! in || ! in->is_open() ) + { + InternalError(Fmt("Trying to close closed file for stream %s", + fname.c_str())); + return false; + } + +#ifdef DEBUG + Debug(DBG_INPUT, "Binary reader starting close"); +#endif + + in->close(); + delete in; + in = 0; + +#ifdef DEBUG + Debug(DBG_INPUT, "Binary reader finished close"); +#endif + + return true; + } + +bool Binary::DoInit(const ReaderInfo& info, int num_fields, + const Field* const* fields) + { + in = 0; + mtime = 0; + firstrun = true; + + if ( ! info.source || strlen(info.source) == 0 ) + { + Error("No source path provided"); + return false; + } + + if ( num_fields != 1 ) + { + Error("Filter for binary reader contains more than one field. Filters " + "for binary reader must contain exactly one string field. " + "Filter ignored."); + return false; + } + + if ( fields[0]->type != TYPE_STRING ) + { + Error("Filter for binary reader contains a non-string field."); + return false; + } + + // do Initialization + fname = info.source; + + if ( ! OpenInput() ) return false; + + if ( UpdateModificationTime() == -1 ) return false; + +#ifdef DEBUG + Debug(DBG_INPUT, "Binary reader created, will perform first update"); +#endif + + // after initialization - do update + DoUpdate(); + +#ifdef DEBUG + Debug(DBG_INPUT, "Binary reader did first update"); +#endif + + return true; + } + +streamsize Binary::GetChunk(char** chunk) + { + if ( in->peek() == std::iostream::traits_type::eof() ) + return 0; + + if ( in->eof() == true || in->fail() == true ) + return 0; + + *chunk = new char[chunk_size]; + + in->read(*chunk, chunk_size); + + streamsize bytes_read = in->gcount(); + + if ( ! bytes_read ) + { + delete *chunk; + *chunk = 0; + return 0; + } + + // probably faster to just not resize if bytes_read < chunk_size, since + // length of valid data is known + + return bytes_read; + } + +int Binary::UpdateModificationTime() + { + struct stat sb; + + if ( stat(fname.c_str(), &sb) == -1 ) + { + Error(Fmt("Could not get stat for %s", fname.c_str())); + return -1; + } + + if ( sb.st_mtime <= mtime ) + // no change + return 0; + + mtime = sb.st_mtime; + return 1; + } + +// read the entire file and send appropriate thingies back to InputMgr +bool Binary::DoUpdate() + { + if ( firstrun ) + firstrun = false; + + else + { + switch ( Info().mode ) { + case MODE_REREAD: + { + switch ( UpdateModificationTime() ) { + case -1: + return false; // error + case 0: + return true; // no change + case 1: + break; // file changed. reread. + default: + assert(false); + } + // fallthrough + } + + case MODE_MANUAL: + case MODE_STREAM: + if ( Info().mode == MODE_STREAM && in ) + { + in->clear(); // remove end of file evil bits + break; + } + + CloseInput(); + if ( ! OpenInput() ) + return false; + + break; + + default: + assert(false); + } + } + + char* chunk = 0; + streamsize size = 0; + while ( (size = GetChunk(&chunk)) ) + { + assert (NumFields() == 1); + + Value** fields = new Value*[1]; + + // filter has exactly one text field. convert to it. + Value* val = new Value(TYPE_STRING, true); + val->val.string_val.data = chunk; + val->val.string_val.length = size; + fields[0] = val; + + if ( Info().mode == MODE_STREAM ) + Put(fields); + else + SendEntry(fields); + } + + if ( Info().mode != MODE_STREAM ) + EndCurrentSend(); + +#ifdef DEBUG + Debug(DBG_INPUT, "DoUpdate finished successfully"); +#endif + + return true; + } + +bool Binary::DoHeartbeat(double network_time, double current_time) + { + switch ( Info().mode ) { + case MODE_MANUAL: + // yay, we do nothing :) + break; + + case MODE_REREAD: + case MODE_STREAM: +#ifdef DEBUG + Debug(DBG_INPUT, "Starting Heartbeat update"); +#endif + Update(); // call update and not DoUpdate, because update + // checks disabled. +#ifdef DEBUG + Debug(DBG_INPUT, "Finished with heartbeat update"); +#endif + break; + default: + assert(false); + } + + return true; + } diff --git a/src/input/readers/Binary.h b/src/input/readers/Binary.h new file mode 100644 index 0000000000..2705800ab8 --- /dev/null +++ b/src/input/readers/Binary.h @@ -0,0 +1,48 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#ifndef INPUT_READERS_BINARY_H +#define INPUT_READERS_BINARY_H + +#include "../ReaderBackend.h" +#include + +namespace input { namespace reader { + +/** + * Binary mode file reader. + */ +class Binary : public ReaderBackend { +public: + Binary(ReaderFrontend* frontend); + + ~Binary(); + + static ReaderBackend* Instantiate(ReaderFrontend* frontend) + { return new Binary(frontend); } + +protected: + virtual bool DoInit(const ReaderInfo& info, int arg_num_fields, + const threading::Field* const* fields); + virtual void DoClose(); + virtual bool DoUpdate(); + virtual bool DoHeartbeat(double network_time, double current_time); + +private: + bool OpenInput(); + bool CloseInput(); + streamsize GetChunk(char** chunk); + int UpdateModificationTime(); + + string fname; + ifstream* in; + time_t mtime; + bool firstrun; + + // options set from the script-level. + static streamsize chunk_size; +}; + +} +} + +#endif /* INPUT_READERS_BINARY_H */ diff --git a/src/types.bif b/src/types.bif index 888310419c..b800ce2e8e 100644 --- a/src/types.bif +++ b/src/types.bif @@ -196,6 +196,7 @@ enum Reader %{ READER_ASCII, READER_RAW, READER_BENCHMARK, + READER_BINARY, %} enum Event %{