FileAnalysis: add binary input reader and BIFs for sending in data.

This allows the input framework to feed files in to Bro for analysis.
This commit is contained in:
Jon Siwek 2013-03-06 12:59:54 -06:00
parent c330b46128
commit 00b2d34a8e
14 changed files with 399 additions and 29 deletions

View file

@ -51,8 +51,10 @@ export {
## from a container file as part of the analysis. ## from a container file as part of the analysis.
parent_file_id: string &log &optional; parent_file_id: string &log &optional;
## The network protocol over which the file was transferred. ## An identification of the source of the file data. E.g. it may be
protocol: string &log &optional; ## a network protocol over which it was transferred, or a local file
## path which was read, or some other input source.
source: string &log &optional;
## The set of connections over which the file was transferred, ## The set of connections over which the file was transferred,
## indicated by UID strings. ## indicated by UID strings.

View file

@ -2,4 +2,5 @@
@load ./readers/ascii @load ./readers/ascii
@load ./readers/raw @load ./readers/raw
@load ./readers/benchmark @load ./readers/benchmark
@load ./readers/binary

View file

@ -0,0 +1,8 @@
##! Interface for the binary input reader.
module InputBinary;
export {
## Size of data chunks to read from the input file at a time.
const chunk_size = 1024 &redef;
}

View file

@ -447,6 +447,7 @@ set(bro_SRCS
input/readers/Ascii.cc input/readers/Ascii.cc
input/readers/Raw.cc input/readers/Raw.cc
input/readers/Benchmark.cc input/readers/Benchmark.cc
input/readers/Binary.cc
file_analysis/Manager.cc file_analysis/Manager.cc
file_analysis/Info.cc file_analysis/Info.cc

View file

@ -93,3 +93,44 @@ function FileAnalysis::stop%(file_id: string%): bool
bool result = file_mgr->IgnoreFile(FileID(file_id->CheckString())); bool result = file_mgr->IgnoreFile(FileID(file_id->CheckString()));
return new Val(result, TYPE_BOOL); return new Val(result, TYPE_BOOL);
%} %}
function FileAnalysis::input_data%(source: string, data: string%): any
%{
string s = source->CheckString();
string unique = "BIF " + s;
file_mgr->DataIn(unique, data->Bytes(), data->Len(), 0, s);
return 0;
%}
function FileAnalysis::input_data_chunk%(source: string, data: string,
offset: count%): any
%{
string s = source->CheckString();
string unique = "BIF " + s;
file_mgr->DataIn(unique, data->Bytes(), data->Len(), offset, 0, s);
return 0;
%}
function FileAnalysis::gap%(source: string, offset: count, len: count%): any
%{
string s = source->CheckString();
string unique = "BIF " + s;
file_mgr->Gap(unique, offset, len, 0, s);
return 0;
%}
function FileAnalysis::set_size%(source: string, size: count%): any
%{
string s = source->CheckString();
string unique = "BIF " + s;
file_mgr->SetSize(unique, size, 0, s);
return 0;
%}
function FileAnalysis::input_eof%(source: string%): any
%{
string s = source->CheckString();
string unique = "BIF "+ s;
file_mgr->EndOfFile(unique, 0, s);
return 0;
%}

View file

@ -37,7 +37,7 @@ static RecordVal* get_conn_id_val(const Connection* conn)
int Info::file_id_idx = -1; int Info::file_id_idx = -1;
int Info::parent_file_id_idx = -1; int Info::parent_file_id_idx = -1;
int Info::protocol_idx = -1; int Info::source_idx = -1;
int Info::conn_uids_idx = -1; int Info::conn_uids_idx = -1;
int Info::conn_ids_idx = -1; int Info::conn_ids_idx = -1;
int Info::seen_bytes_idx = -1; int Info::seen_bytes_idx = -1;
@ -59,7 +59,7 @@ void Info::InitFieldIndices()
if ( file_id_idx != -1 ) return; if ( file_id_idx != -1 ) return;
file_id_idx = Idx("file_id"); file_id_idx = Idx("file_id");
parent_file_id_idx = Idx("parent_file_id"); parent_file_id_idx = Idx("parent_file_id");
protocol_idx = Idx("protocol"); source_idx = Idx("source");
conn_uids_idx = Idx("conn_uids"); conn_uids_idx = Idx("conn_uids");
conn_ids_idx = Idx("conn_ids"); conn_ids_idx = Idx("conn_ids");
seen_bytes_idx = Idx("seen_bytes"); seen_bytes_idx = Idx("seen_bytes");
@ -89,7 +89,7 @@ static void init_magic(magic_t* magic, int flags)
} }
} }
Info::Info(const string& unique, Connection* conn, const string& protocol) Info::Info(const string& unique, Connection* conn, const string& source)
: file_id(unique), unique(unique), val(0), last_activity_time(network_time), : file_id(unique), unique(unique), val(0), last_activity_time(network_time),
postpone_timeout(false), need_reassembly(false), done(false), postpone_timeout(false), need_reassembly(false), done(false),
actions(this) actions(this)
@ -113,8 +113,8 @@ Info::Info(const string& unique, Connection* conn, const string& protocol)
UpdateConnectionFields(conn); UpdateConnectionFields(conn);
if ( protocol != "" ) if ( ! source.empty() )
val->Assign(protocol_idx, new StringVal(protocol.c_str())); val->Assign(source_idx, new StringVal(source.c_str()));
} }
Info::~Info() Info::~Info()

View file

@ -117,8 +117,7 @@ protected:
/** /**
* Constructor; only file_analysis::Manager should be creating these. * Constructor; only file_analysis::Manager should be creating these.
*/ */
Info(const string& unique, Connection* conn = 0, Info(const string& unique, Connection* conn = 0, const string& source = "");
const string& protocol = "");
/** /**
* Updates the "conn_ids" and "conn_uids" fields in #val record with the * Updates the "conn_ids" and "conn_uids" fields in #val record with the
@ -190,7 +189,7 @@ protected:
public: public:
static int file_id_idx; static int file_id_idx;
static int parent_file_id_idx; static int parent_file_id_idx;
static int protocol_idx; static int source_idx;
static int conn_uids_idx; static int conn_uids_idx;
static int conn_ids_idx; static int conn_ids_idx;
static int seen_bytes_idx; static int seen_bytes_idx;

View file

@ -26,11 +26,11 @@ void Manager::Terminate()
} }
void Manager::DataIn(const string& unique, const u_char* data, uint64 len, void Manager::DataIn(const string& unique, const u_char* data, uint64 len,
uint64 offset, Connection* conn, const string& protocol) uint64 offset, Connection* conn, const string& source)
{ {
if ( IsIgnored(unique) ) return; if ( IsIgnored(unique) ) return;
Info* info = GetInfo(unique, conn, protocol); Info* info = GetInfo(unique, conn, source);
if ( ! info ) return; if ( ! info ) return;
@ -41,9 +41,9 @@ void Manager::DataIn(const string& unique, const u_char* data, uint64 len,
} }
void Manager::DataIn(const string& unique, const u_char* data, uint64 len, void Manager::DataIn(const string& unique, const u_char* data, uint64 len,
Connection* conn, const string& protocol) Connection* conn, const string& source)
{ {
Info* info = GetInfo(unique, conn, protocol); Info* info = GetInfo(unique, conn, source);
if ( ! info ) return; if ( ! info ) return;
@ -54,18 +54,18 @@ void Manager::DataIn(const string& unique, const u_char* data, uint64 len,
} }
void Manager::EndOfFile(const string& unique, Connection* conn, void Manager::EndOfFile(const string& unique, Connection* conn,
const string& protocol) const string& source)
{ {
// Just call GetInfo because maybe the conn/protocol args will update // Just call GetInfo because maybe the conn/source args will update
// something in the Info record. // something in the Info record.
GetInfo(unique, conn, protocol); GetInfo(unique, conn, source);
RemoveFile(unique); RemoveFile(unique);
} }
void Manager::Gap(const string& unique, uint64 offset, uint64 len, void Manager::Gap(const string& unique, uint64 offset, uint64 len,
Connection* conn, const string& protocol) Connection* conn, const string& source)
{ {
Info* info = GetInfo(unique, conn, protocol); Info* info = GetInfo(unique, conn, source);
if ( ! info ) return; if ( ! info ) return;
@ -73,9 +73,9 @@ void Manager::Gap(const string& unique, uint64 offset, uint64 len,
} }
void Manager::SetSize(const string& unique, uint64 size, void Manager::SetSize(const string& unique, uint64 size,
Connection* conn, const string& protocol) Connection* conn, const string& source)
{ {
Info* info = GetInfo(unique, conn, protocol); Info* info = GetInfo(unique, conn, source);
if ( ! info ) return; if ( ! info ) return;
@ -132,7 +132,7 @@ bool Manager::RemoveAction(const FileID& file_id, const RecordVal* args) const
} }
Info* Manager::GetInfo(const string& unique, Connection* conn, Info* Manager::GetInfo(const string& unique, Connection* conn,
const string& protocol) const string& source)
{ {
if ( IsIgnored(unique) ) return 0; if ( IsIgnored(unique) ) return 0;
@ -140,7 +140,7 @@ Info* Manager::GetInfo(const string& unique, Connection* conn,
if ( ! rval ) if ( ! rval )
{ {
rval = str_map[unique] = new Info(unique, conn, protocol); rval = str_map[unique] = new Info(unique, conn, source);
FileID id = rval->GetFileID(); FileID id = rval->GetFileID();
if ( id_map[id] ) if ( id_map[id] )

View file

@ -35,31 +35,31 @@ public:
*/ */
void DataIn(const string& unique, const u_char* data, uint64 len, void DataIn(const string& unique, const u_char* data, uint64 len,
uint64 offset, Connection* conn = 0, uint64 offset, Connection* conn = 0,
const string& protocol = ""); const string& source = "");
/** /**
* Pass in sequential file data. * Pass in sequential file data.
*/ */
void DataIn(const string& unique, const u_char* data, uint64 len, void DataIn(const string& unique, const u_char* data, uint64 len,
Connection* conn = 0, const string& protocol = ""); Connection* conn = 0, const string& source = "");
/** /**
* Signal the end of file data. * Signal the end of file data.
*/ */
void EndOfFile(const string& unique, Connection* conn = 0, void EndOfFile(const string& unique, Connection* conn = 0,
const string& protocol = ""); const string& source = "");
/** /**
* Signal a gap in the file data stream. * Signal a gap in the file data stream.
*/ */
void Gap(const string& unique, uint64 offset, uint64 len, void Gap(const string& unique, uint64 offset, uint64 len,
Connection* conn = 0, const string& protocol = ""); Connection* conn = 0, const string& source = "");
/** /**
* Provide the expected number of bytes that comprise a file. * Provide the expected number of bytes that comprise a file.
*/ */
void SetSize(const string& unique, uint64 size, Connection* conn = 0, void SetSize(const string& unique, uint64 size, Connection* conn = 0,
const string& protocol = ""); const string& source = "");
/** /**
* Starts ignoring a file, which will finally be removed from internal * Starts ignoring a file, which will finally be removed from internal
@ -109,7 +109,7 @@ protected:
* record value may be updated. * record value may be updated.
*/ */
Info* GetInfo(const string& unique, Connection* conn = 0, Info* GetInfo(const string& unique, Connection* conn = 0,
const string& protocol = ""); const string& source = "");
/** /**
* @return the Info object mapped to \a file_id, or a null pointer if no * @return the Info object mapped to \a file_id, or a null pointer if no

View file

@ -57,3 +57,6 @@ const autospread: double;
const addfactor: count; const addfactor: count;
const stopspreadat: count; const stopspreadat: count;
const timedspread: double; const timedspread: double;
module InputBinary;
const chunk_size: count;

View file

@ -8,6 +8,7 @@
#include "readers/Ascii.h" #include "readers/Ascii.h"
#include "readers/Raw.h" #include "readers/Raw.h"
#include "readers/Benchmark.h" #include "readers/Benchmark.h"
#include "readers/Binary.h"
#include "Event.h" #include "Event.h"
#include "EventHandler.h" #include "EventHandler.h"
@ -34,6 +35,7 @@ ReaderDefinition input_readers[] = {
{ BifEnum::Input::READER_ASCII, "Ascii", 0, reader::Ascii::Instantiate }, { BifEnum::Input::READER_ASCII, "Ascii", 0, reader::Ascii::Instantiate },
{ BifEnum::Input::READER_RAW, "Raw", 0, reader::Raw::Instantiate }, { BifEnum::Input::READER_RAW, "Raw", 0, reader::Raw::Instantiate },
{ BifEnum::Input::READER_BENCHMARK, "Benchmark", 0, reader::Benchmark::Instantiate }, { BifEnum::Input::READER_BENCHMARK, "Benchmark", 0, reader::Benchmark::Instantiate },
{ BifEnum::Input::READER_BINARY, "Binary", 0, reader::Binary::Instantiate },
// End marker // End marker
{ BifEnum::Input::READER_DEFAULT, "None", 0, (ReaderBackend* (*)(ReaderFrontend* frontend))0 } { BifEnum::Input::READER_DEFAULT, "None", 0, (ReaderBackend* (*)(ReaderFrontend* frontend))0 }

264
src/input/readers/Binary.cc Normal file
View file

@ -0,0 +1,264 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include <sys/stat.h>
#include "Binary.h"
#include "NetVar.h"
#include "../../threading/SerialTypes.h"
using namespace input::reader;
using threading::Value;
using threading::Field;
streamsize Binary::chunk_size = 0;
Binary::Binary(ReaderFrontend *frontend)
: ReaderBackend(frontend), in(0), mtime(0), firstrun(true)
{
if ( ! chunk_size )
{
chunk_size = BifConst::InputBinary::chunk_size;
if ( ! chunk_size )
chunk_size = 1024;
}
}
Binary::~Binary()
{
DoClose();
}
void Binary::DoClose()
{
if ( in )
CloseInput();
}
bool Binary::OpenInput()
{
in = new ifstream(fname.c_str(), ios_base::in | ios_base::binary);
if ( in->fail() )
{
Error(Fmt("Init: cannot open %s", fname.c_str()));
return false;
}
return true;
}
bool Binary::CloseInput()
{
if ( ! in || ! in->is_open() )
{
InternalError(Fmt("Trying to close closed file for stream %s",
fname.c_str()));
return false;
}
#ifdef DEBUG
Debug(DBG_INPUT, "Binary reader starting close");
#endif
in->close();
delete in;
in = 0;
#ifdef DEBUG
Debug(DBG_INPUT, "Binary reader finished close");
#endif
return true;
}
bool Binary::DoInit(const ReaderInfo& info, int num_fields,
const Field* const* fields)
{
in = 0;
mtime = 0;
firstrun = true;
if ( ! info.source || strlen(info.source) == 0 )
{
Error("No source path provided");
return false;
}
if ( num_fields != 1 )
{
Error("Filter for binary reader contains more than one field. Filters "
"for binary reader must contain exactly one string field. "
"Filter ignored.");
return false;
}
if ( fields[0]->type != TYPE_STRING )
{
Error("Filter for binary reader contains a non-string field.");
return false;
}
// do Initialization
fname = info.source;
if ( ! OpenInput() ) return false;
if ( UpdateModificationTime() == -1 ) return false;
#ifdef DEBUG
Debug(DBG_INPUT, "Binary reader created, will perform first update");
#endif
// after initialization - do update
DoUpdate();
#ifdef DEBUG
Debug(DBG_INPUT, "Binary reader did first update");
#endif
return true;
}
streamsize Binary::GetChunk(char** chunk)
{
if ( in->peek() == std::iostream::traits_type::eof() )
return 0;
if ( in->eof() == true || in->fail() == true )
return 0;
*chunk = new char[chunk_size];
in->read(*chunk, chunk_size);
streamsize bytes_read = in->gcount();
if ( ! bytes_read )
{
delete *chunk;
*chunk = 0;
return 0;
}
// probably faster to just not resize if bytes_read < chunk_size, since
// length of valid data is known
return bytes_read;
}
int Binary::UpdateModificationTime()
{
struct stat sb;
if ( stat(fname.c_str(), &sb) == -1 )
{
Error(Fmt("Could not get stat for %s", fname.c_str()));
return -1;
}
if ( sb.st_mtime <= mtime )
// no change
return 0;
mtime = sb.st_mtime;
return 1;
}
// read the entire file and send appropriate thingies back to InputMgr
bool Binary::DoUpdate()
{
if ( firstrun )
firstrun = false;
else
{
switch ( Info().mode ) {
case MODE_REREAD:
{
switch ( UpdateModificationTime() ) {
case -1:
return false; // error
case 0:
return true; // no change
case 1:
break; // file changed. reread.
default:
assert(false);
}
// fallthrough
}
case MODE_MANUAL:
case MODE_STREAM:
if ( Info().mode == MODE_STREAM && in )
{
in->clear(); // remove end of file evil bits
break;
}
CloseInput();
if ( ! OpenInput() )
return false;
break;
default:
assert(false);
}
}
char* chunk = 0;
streamsize size = 0;
while ( (size = GetChunk(&chunk)) )
{
assert (NumFields() == 1);
Value** fields = new Value*[1];
// filter has exactly one text field. convert to it.
Value* val = new Value(TYPE_STRING, true);
val->val.string_val.data = chunk;
val->val.string_val.length = size;
fields[0] = val;
if ( Info().mode == MODE_STREAM )
Put(fields);
else
SendEntry(fields);
}
if ( Info().mode != MODE_STREAM )
EndCurrentSend();
#ifdef DEBUG
Debug(DBG_INPUT, "DoUpdate finished successfully");
#endif
return true;
}
bool Binary::DoHeartbeat(double network_time, double current_time)
{
switch ( Info().mode ) {
case MODE_MANUAL:
// yay, we do nothing :)
break;
case MODE_REREAD:
case MODE_STREAM:
#ifdef DEBUG
Debug(DBG_INPUT, "Starting Heartbeat update");
#endif
Update(); // call update and not DoUpdate, because update
// checks disabled.
#ifdef DEBUG
Debug(DBG_INPUT, "Finished with heartbeat update");
#endif
break;
default:
assert(false);
}
return true;
}

View file

@ -0,0 +1,48 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef INPUT_READERS_BINARY_H
#define INPUT_READERS_BINARY_H
#include "../ReaderBackend.h"
#include <fstream>
namespace input { namespace reader {
/**
* Binary mode file reader.
*/
class Binary : public ReaderBackend {
public:
Binary(ReaderFrontend* frontend);
~Binary();
static ReaderBackend* Instantiate(ReaderFrontend* frontend)
{ return new Binary(frontend); }
protected:
virtual bool DoInit(const ReaderInfo& info, int arg_num_fields,
const threading::Field* const* fields);
virtual void DoClose();
virtual bool DoUpdate();
virtual bool DoHeartbeat(double network_time, double current_time);
private:
bool OpenInput();
bool CloseInput();
streamsize GetChunk(char** chunk);
int UpdateModificationTime();
string fname;
ifstream* in;
time_t mtime;
bool firstrun;
// options set from the script-level.
static streamsize chunk_size;
};
}
}
#endif /* INPUT_READERS_BINARY_H */

View file

@ -196,6 +196,7 @@ enum Reader %{
READER_ASCII, READER_ASCII,
READER_RAW, READER_RAW,
READER_BENCHMARK, READER_BENCHMARK,
READER_BINARY,
%} %}
enum Event %{ enum Event %{