mirror of
https://github.com/zeek/zeek.git
synced 2025-10-03 15:18:20 +00:00
FileAnalysis: add binary input reader and BIFs for sending in data.
This allows the input framework to feed files in to Bro for analysis.
This commit is contained in:
parent
c330b46128
commit
00b2d34a8e
14 changed files with 399 additions and 29 deletions
|
@ -51,8 +51,10 @@ export {
|
|||
## from a container file as part of the analysis.
|
||||
parent_file_id: string &log &optional;
|
||||
|
||||
## The network protocol over which the file was transferred.
|
||||
protocol: string &log &optional;
|
||||
## An identification of the source of the file data. E.g. it may be
|
||||
## a network protocol over which it was transferred, or a local file
|
||||
## path which was read, or some other input source.
|
||||
source: string &log &optional;
|
||||
|
||||
## The set of connections over which the file was transferred,
|
||||
## indicated by UID strings.
|
||||
|
|
|
@ -2,4 +2,5 @@
|
|||
@load ./readers/ascii
|
||||
@load ./readers/raw
|
||||
@load ./readers/benchmark
|
||||
@load ./readers/binary
|
||||
|
||||
|
|
8
scripts/base/frameworks/input/readers/binary.bro
Normal file
8
scripts/base/frameworks/input/readers/binary.bro
Normal file
|
@ -0,0 +1,8 @@
|
|||
##! Interface for the binary input reader.
|
||||
|
||||
module InputBinary;
|
||||
|
||||
export {
|
||||
## Size of data chunks to read from the input file at a time.
|
||||
const chunk_size = 1024 &redef;
|
||||
}
|
|
@ -447,6 +447,7 @@ set(bro_SRCS
|
|||
input/readers/Ascii.cc
|
||||
input/readers/Raw.cc
|
||||
input/readers/Benchmark.cc
|
||||
input/readers/Binary.cc
|
||||
|
||||
file_analysis/Manager.cc
|
||||
file_analysis/Info.cc
|
||||
|
|
|
@ -93,3 +93,44 @@ function FileAnalysis::stop%(file_id: string%): bool
|
|||
bool result = file_mgr->IgnoreFile(FileID(file_id->CheckString()));
|
||||
return new Val(result, TYPE_BOOL);
|
||||
%}
|
||||
|
||||
function FileAnalysis::input_data%(source: string, data: string%): any
|
||||
%{
|
||||
string s = source->CheckString();
|
||||
string unique = "BIF " + s;
|
||||
file_mgr->DataIn(unique, data->Bytes(), data->Len(), 0, s);
|
||||
return 0;
|
||||
%}
|
||||
|
||||
function FileAnalysis::input_data_chunk%(source: string, data: string,
|
||||
offset: count%): any
|
||||
%{
|
||||
string s = source->CheckString();
|
||||
string unique = "BIF " + s;
|
||||
file_mgr->DataIn(unique, data->Bytes(), data->Len(), offset, 0, s);
|
||||
return 0;
|
||||
%}
|
||||
|
||||
function FileAnalysis::gap%(source: string, offset: count, len: count%): any
|
||||
%{
|
||||
string s = source->CheckString();
|
||||
string unique = "BIF " + s;
|
||||
file_mgr->Gap(unique, offset, len, 0, s);
|
||||
return 0;
|
||||
%}
|
||||
|
||||
function FileAnalysis::set_size%(source: string, size: count%): any
|
||||
%{
|
||||
string s = source->CheckString();
|
||||
string unique = "BIF " + s;
|
||||
file_mgr->SetSize(unique, size, 0, s);
|
||||
return 0;
|
||||
%}
|
||||
|
||||
function FileAnalysis::input_eof%(source: string%): any
|
||||
%{
|
||||
string s = source->CheckString();
|
||||
string unique = "BIF "+ s;
|
||||
file_mgr->EndOfFile(unique, 0, s);
|
||||
return 0;
|
||||
%}
|
||||
|
|
|
@ -37,7 +37,7 @@ static RecordVal* get_conn_id_val(const Connection* conn)
|
|||
|
||||
int Info::file_id_idx = -1;
|
||||
int Info::parent_file_id_idx = -1;
|
||||
int Info::protocol_idx = -1;
|
||||
int Info::source_idx = -1;
|
||||
int Info::conn_uids_idx = -1;
|
||||
int Info::conn_ids_idx = -1;
|
||||
int Info::seen_bytes_idx = -1;
|
||||
|
@ -59,7 +59,7 @@ void Info::InitFieldIndices()
|
|||
if ( file_id_idx != -1 ) return;
|
||||
file_id_idx = Idx("file_id");
|
||||
parent_file_id_idx = Idx("parent_file_id");
|
||||
protocol_idx = Idx("protocol");
|
||||
source_idx = Idx("source");
|
||||
conn_uids_idx = Idx("conn_uids");
|
||||
conn_ids_idx = Idx("conn_ids");
|
||||
seen_bytes_idx = Idx("seen_bytes");
|
||||
|
@ -89,7 +89,7 @@ static void init_magic(magic_t* magic, int flags)
|
|||
}
|
||||
}
|
||||
|
||||
Info::Info(const string& unique, Connection* conn, const string& protocol)
|
||||
Info::Info(const string& unique, Connection* conn, const string& source)
|
||||
: file_id(unique), unique(unique), val(0), last_activity_time(network_time),
|
||||
postpone_timeout(false), need_reassembly(false), done(false),
|
||||
actions(this)
|
||||
|
@ -113,8 +113,8 @@ Info::Info(const string& unique, Connection* conn, const string& protocol)
|
|||
|
||||
UpdateConnectionFields(conn);
|
||||
|
||||
if ( protocol != "" )
|
||||
val->Assign(protocol_idx, new StringVal(protocol.c_str()));
|
||||
if ( ! source.empty() )
|
||||
val->Assign(source_idx, new StringVal(source.c_str()));
|
||||
}
|
||||
|
||||
Info::~Info()
|
||||
|
|
|
@ -117,8 +117,7 @@ protected:
|
|||
/**
|
||||
* Constructor; only file_analysis::Manager should be creating these.
|
||||
*/
|
||||
Info(const string& unique, Connection* conn = 0,
|
||||
const string& protocol = "");
|
||||
Info(const string& unique, Connection* conn = 0, const string& source = "");
|
||||
|
||||
/**
|
||||
* Updates the "conn_ids" and "conn_uids" fields in #val record with the
|
||||
|
@ -190,7 +189,7 @@ protected:
|
|||
public:
|
||||
static int file_id_idx;
|
||||
static int parent_file_id_idx;
|
||||
static int protocol_idx;
|
||||
static int source_idx;
|
||||
static int conn_uids_idx;
|
||||
static int conn_ids_idx;
|
||||
static int seen_bytes_idx;
|
||||
|
|
|
@ -26,11 +26,11 @@ void Manager::Terminate()
|
|||
}
|
||||
|
||||
void Manager::DataIn(const string& unique, const u_char* data, uint64 len,
|
||||
uint64 offset, Connection* conn, const string& protocol)
|
||||
uint64 offset, Connection* conn, const string& source)
|
||||
{
|
||||
if ( IsIgnored(unique) ) return;
|
||||
|
||||
Info* info = GetInfo(unique, conn, protocol);
|
||||
Info* info = GetInfo(unique, conn, source);
|
||||
|
||||
if ( ! info ) return;
|
||||
|
||||
|
@ -41,9 +41,9 @@ void Manager::DataIn(const string& unique, const u_char* data, uint64 len,
|
|||
}
|
||||
|
||||
void Manager::DataIn(const string& unique, const u_char* data, uint64 len,
|
||||
Connection* conn, const string& protocol)
|
||||
Connection* conn, const string& source)
|
||||
{
|
||||
Info* info = GetInfo(unique, conn, protocol);
|
||||
Info* info = GetInfo(unique, conn, source);
|
||||
|
||||
if ( ! info ) return;
|
||||
|
||||
|
@ -54,18 +54,18 @@ void Manager::DataIn(const string& unique, const u_char* data, uint64 len,
|
|||
}
|
||||
|
||||
void Manager::EndOfFile(const string& unique, Connection* conn,
|
||||
const string& protocol)
|
||||
const string& source)
|
||||
{
|
||||
// Just call GetInfo because maybe the conn/protocol args will update
|
||||
// Just call GetInfo because maybe the conn/source args will update
|
||||
// something in the Info record.
|
||||
GetInfo(unique, conn, protocol);
|
||||
GetInfo(unique, conn, source);
|
||||
RemoveFile(unique);
|
||||
}
|
||||
|
||||
void Manager::Gap(const string& unique, uint64 offset, uint64 len,
|
||||
Connection* conn, const string& protocol)
|
||||
Connection* conn, const string& source)
|
||||
{
|
||||
Info* info = GetInfo(unique, conn, protocol);
|
||||
Info* info = GetInfo(unique, conn, source);
|
||||
|
||||
if ( ! info ) return;
|
||||
|
||||
|
@ -73,9 +73,9 @@ void Manager::Gap(const string& unique, uint64 offset, uint64 len,
|
|||
}
|
||||
|
||||
void Manager::SetSize(const string& unique, uint64 size,
|
||||
Connection* conn, const string& protocol)
|
||||
Connection* conn, const string& source)
|
||||
{
|
||||
Info* info = GetInfo(unique, conn, protocol);
|
||||
Info* info = GetInfo(unique, conn, source);
|
||||
|
||||
if ( ! info ) return;
|
||||
|
||||
|
@ -132,7 +132,7 @@ bool Manager::RemoveAction(const FileID& file_id, const RecordVal* args) const
|
|||
}
|
||||
|
||||
Info* Manager::GetInfo(const string& unique, Connection* conn,
|
||||
const string& protocol)
|
||||
const string& source)
|
||||
{
|
||||
if ( IsIgnored(unique) ) return 0;
|
||||
|
||||
|
@ -140,7 +140,7 @@ Info* Manager::GetInfo(const string& unique, Connection* conn,
|
|||
|
||||
if ( ! rval )
|
||||
{
|
||||
rval = str_map[unique] = new Info(unique, conn, protocol);
|
||||
rval = str_map[unique] = new Info(unique, conn, source);
|
||||
FileID id = rval->GetFileID();
|
||||
|
||||
if ( id_map[id] )
|
||||
|
|
|
@ -35,31 +35,31 @@ public:
|
|||
*/
|
||||
void DataIn(const string& unique, const u_char* data, uint64 len,
|
||||
uint64 offset, Connection* conn = 0,
|
||||
const string& protocol = "");
|
||||
const string& source = "");
|
||||
|
||||
/**
|
||||
* Pass in sequential file data.
|
||||
*/
|
||||
void DataIn(const string& unique, const u_char* data, uint64 len,
|
||||
Connection* conn = 0, const string& protocol = "");
|
||||
Connection* conn = 0, const string& source = "");
|
||||
|
||||
/**
|
||||
* Signal the end of file data.
|
||||
*/
|
||||
void EndOfFile(const string& unique, Connection* conn = 0,
|
||||
const string& protocol = "");
|
||||
const string& source = "");
|
||||
|
||||
/**
|
||||
* Signal a gap in the file data stream.
|
||||
*/
|
||||
void Gap(const string& unique, uint64 offset, uint64 len,
|
||||
Connection* conn = 0, const string& protocol = "");
|
||||
Connection* conn = 0, const string& source = "");
|
||||
|
||||
/**
|
||||
* Provide the expected number of bytes that comprise a file.
|
||||
*/
|
||||
void SetSize(const string& unique, uint64 size, Connection* conn = 0,
|
||||
const string& protocol = "");
|
||||
const string& source = "");
|
||||
|
||||
/**
|
||||
* Starts ignoring a file, which will finally be removed from internal
|
||||
|
@ -109,7 +109,7 @@ protected:
|
|||
* record value may be updated.
|
||||
*/
|
||||
Info* GetInfo(const string& unique, Connection* conn = 0,
|
||||
const string& protocol = "");
|
||||
const string& source = "");
|
||||
|
||||
/**
|
||||
* @return the Info object mapped to \a file_id, or a null pointer if no
|
||||
|
|
|
@ -57,3 +57,6 @@ const autospread: double;
|
|||
const addfactor: count;
|
||||
const stopspreadat: count;
|
||||
const timedspread: double;
|
||||
|
||||
module InputBinary;
|
||||
const chunk_size: count;
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "readers/Ascii.h"
|
||||
#include "readers/Raw.h"
|
||||
#include "readers/Benchmark.h"
|
||||
#include "readers/Binary.h"
|
||||
|
||||
#include "Event.h"
|
||||
#include "EventHandler.h"
|
||||
|
@ -34,6 +35,7 @@ ReaderDefinition input_readers[] = {
|
|||
{ BifEnum::Input::READER_ASCII, "Ascii", 0, reader::Ascii::Instantiate },
|
||||
{ BifEnum::Input::READER_RAW, "Raw", 0, reader::Raw::Instantiate },
|
||||
{ BifEnum::Input::READER_BENCHMARK, "Benchmark", 0, reader::Benchmark::Instantiate },
|
||||
{ BifEnum::Input::READER_BINARY, "Binary", 0, reader::Binary::Instantiate },
|
||||
|
||||
// End marker
|
||||
{ BifEnum::Input::READER_DEFAULT, "None", 0, (ReaderBackend* (*)(ReaderFrontend* frontend))0 }
|
||||
|
|
264
src/input/readers/Binary.cc
Normal file
264
src/input/readers/Binary.cc
Normal file
|
@ -0,0 +1,264 @@
|
|||
// See the file "COPYING" in the main distribution directory for copyright.
|
||||
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "Binary.h"
|
||||
#include "NetVar.h"
|
||||
|
||||
#include "../../threading/SerialTypes.h"
|
||||
|
||||
using namespace input::reader;
|
||||
using threading::Value;
|
||||
using threading::Field;
|
||||
|
||||
streamsize Binary::chunk_size = 0;
|
||||
|
||||
Binary::Binary(ReaderFrontend *frontend)
|
||||
: ReaderBackend(frontend), in(0), mtime(0), firstrun(true)
|
||||
{
|
||||
if ( ! chunk_size )
|
||||
{
|
||||
chunk_size = BifConst::InputBinary::chunk_size;
|
||||
if ( ! chunk_size )
|
||||
chunk_size = 1024;
|
||||
}
|
||||
}
|
||||
|
||||
Binary::~Binary()
|
||||
{
|
||||
DoClose();
|
||||
}
|
||||
|
||||
void Binary::DoClose()
|
||||
{
|
||||
if ( in )
|
||||
CloseInput();
|
||||
}
|
||||
|
||||
bool Binary::OpenInput()
|
||||
{
|
||||
in = new ifstream(fname.c_str(), ios_base::in | ios_base::binary);
|
||||
|
||||
if ( in->fail() )
|
||||
{
|
||||
Error(Fmt("Init: cannot open %s", fname.c_str()));
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Binary::CloseInput()
|
||||
{
|
||||
if ( ! in || ! in->is_open() )
|
||||
{
|
||||
InternalError(Fmt("Trying to close closed file for stream %s",
|
||||
fname.c_str()));
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
Debug(DBG_INPUT, "Binary reader starting close");
|
||||
#endif
|
||||
|
||||
in->close();
|
||||
delete in;
|
||||
in = 0;
|
||||
|
||||
#ifdef DEBUG
|
||||
Debug(DBG_INPUT, "Binary reader finished close");
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Binary::DoInit(const ReaderInfo& info, int num_fields,
|
||||
const Field* const* fields)
|
||||
{
|
||||
in = 0;
|
||||
mtime = 0;
|
||||
firstrun = true;
|
||||
|
||||
if ( ! info.source || strlen(info.source) == 0 )
|
||||
{
|
||||
Error("No source path provided");
|
||||
return false;
|
||||
}
|
||||
|
||||
if ( num_fields != 1 )
|
||||
{
|
||||
Error("Filter for binary reader contains more than one field. Filters "
|
||||
"for binary reader must contain exactly one string field. "
|
||||
"Filter ignored.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if ( fields[0]->type != TYPE_STRING )
|
||||
{
|
||||
Error("Filter for binary reader contains a non-string field.");
|
||||
return false;
|
||||
}
|
||||
|
||||
// do Initialization
|
||||
fname = info.source;
|
||||
|
||||
if ( ! OpenInput() ) return false;
|
||||
|
||||
if ( UpdateModificationTime() == -1 ) return false;
|
||||
|
||||
#ifdef DEBUG
|
||||
Debug(DBG_INPUT, "Binary reader created, will perform first update");
|
||||
#endif
|
||||
|
||||
// after initialization - do update
|
||||
DoUpdate();
|
||||
|
||||
#ifdef DEBUG
|
||||
Debug(DBG_INPUT, "Binary reader did first update");
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
streamsize Binary::GetChunk(char** chunk)
|
||||
{
|
||||
if ( in->peek() == std::iostream::traits_type::eof() )
|
||||
return 0;
|
||||
|
||||
if ( in->eof() == true || in->fail() == true )
|
||||
return 0;
|
||||
|
||||
*chunk = new char[chunk_size];
|
||||
|
||||
in->read(*chunk, chunk_size);
|
||||
|
||||
streamsize bytes_read = in->gcount();
|
||||
|
||||
if ( ! bytes_read )
|
||||
{
|
||||
delete *chunk;
|
||||
*chunk = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// probably faster to just not resize if bytes_read < chunk_size, since
|
||||
// length of valid data is known
|
||||
|
||||
return bytes_read;
|
||||
}
|
||||
|
||||
int Binary::UpdateModificationTime()
|
||||
{
|
||||
struct stat sb;
|
||||
|
||||
if ( stat(fname.c_str(), &sb) == -1 )
|
||||
{
|
||||
Error(Fmt("Could not get stat for %s", fname.c_str()));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ( sb.st_mtime <= mtime )
|
||||
// no change
|
||||
return 0;
|
||||
|
||||
mtime = sb.st_mtime;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// read the entire file and send appropriate thingies back to InputMgr
|
||||
bool Binary::DoUpdate()
|
||||
{
|
||||
if ( firstrun )
|
||||
firstrun = false;
|
||||
|
||||
else
|
||||
{
|
||||
switch ( Info().mode ) {
|
||||
case MODE_REREAD:
|
||||
{
|
||||
switch ( UpdateModificationTime() ) {
|
||||
case -1:
|
||||
return false; // error
|
||||
case 0:
|
||||
return true; // no change
|
||||
case 1:
|
||||
break; // file changed. reread.
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
// fallthrough
|
||||
}
|
||||
|
||||
case MODE_MANUAL:
|
||||
case MODE_STREAM:
|
||||
if ( Info().mode == MODE_STREAM && in )
|
||||
{
|
||||
in->clear(); // remove end of file evil bits
|
||||
break;
|
||||
}
|
||||
|
||||
CloseInput();
|
||||
if ( ! OpenInput() )
|
||||
return false;
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
char* chunk = 0;
|
||||
streamsize size = 0;
|
||||
while ( (size = GetChunk(&chunk)) )
|
||||
{
|
||||
assert (NumFields() == 1);
|
||||
|
||||
Value** fields = new Value*[1];
|
||||
|
||||
// filter has exactly one text field. convert to it.
|
||||
Value* val = new Value(TYPE_STRING, true);
|
||||
val->val.string_val.data = chunk;
|
||||
val->val.string_val.length = size;
|
||||
fields[0] = val;
|
||||
|
||||
if ( Info().mode == MODE_STREAM )
|
||||
Put(fields);
|
||||
else
|
||||
SendEntry(fields);
|
||||
}
|
||||
|
||||
if ( Info().mode != MODE_STREAM )
|
||||
EndCurrentSend();
|
||||
|
||||
#ifdef DEBUG
|
||||
Debug(DBG_INPUT, "DoUpdate finished successfully");
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Binary::DoHeartbeat(double network_time, double current_time)
|
||||
{
|
||||
switch ( Info().mode ) {
|
||||
case MODE_MANUAL:
|
||||
// yay, we do nothing :)
|
||||
break;
|
||||
|
||||
case MODE_REREAD:
|
||||
case MODE_STREAM:
|
||||
#ifdef DEBUG
|
||||
Debug(DBG_INPUT, "Starting Heartbeat update");
|
||||
#endif
|
||||
Update(); // call update and not DoUpdate, because update
|
||||
// checks disabled.
|
||||
#ifdef DEBUG
|
||||
Debug(DBG_INPUT, "Finished with heartbeat update");
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
48
src/input/readers/Binary.h
Normal file
48
src/input/readers/Binary.h
Normal file
|
@ -0,0 +1,48 @@
|
|||
// See the file "COPYING" in the main distribution directory for copyright.
|
||||
|
||||
#ifndef INPUT_READERS_BINARY_H
|
||||
#define INPUT_READERS_BINARY_H
|
||||
|
||||
#include "../ReaderBackend.h"
|
||||
#include <fstream>
|
||||
|
||||
namespace input { namespace reader {
|
||||
|
||||
/**
|
||||
* Binary mode file reader.
|
||||
*/
|
||||
class Binary : public ReaderBackend {
|
||||
public:
|
||||
Binary(ReaderFrontend* frontend);
|
||||
|
||||
~Binary();
|
||||
|
||||
static ReaderBackend* Instantiate(ReaderFrontend* frontend)
|
||||
{ return new Binary(frontend); }
|
||||
|
||||
protected:
|
||||
virtual bool DoInit(const ReaderInfo& info, int arg_num_fields,
|
||||
const threading::Field* const* fields);
|
||||
virtual void DoClose();
|
||||
virtual bool DoUpdate();
|
||||
virtual bool DoHeartbeat(double network_time, double current_time);
|
||||
|
||||
private:
|
||||
bool OpenInput();
|
||||
bool CloseInput();
|
||||
streamsize GetChunk(char** chunk);
|
||||
int UpdateModificationTime();
|
||||
|
||||
string fname;
|
||||
ifstream* in;
|
||||
time_t mtime;
|
||||
bool firstrun;
|
||||
|
||||
// options set from the script-level.
|
||||
static streamsize chunk_size;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* INPUT_READERS_BINARY_H */
|
|
@ -196,6 +196,7 @@ enum Reader %{
|
|||
READER_ASCII,
|
||||
READER_RAW,
|
||||
READER_BENCHMARK,
|
||||
READER_BINARY,
|
||||
%}
|
||||
|
||||
enum Event %{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue