Merge remote-tracking branch 'origin/master' into topic/robin/plugins

Thanks to git this merge was less troublesome that I was afraid it
would be. Not all tests pass yet though (and file hashes have changed
unfortunately).

Conflicts:
	cmake
	doc/scripts/DocSourcesList.cmake
	scripts/base/init-bare.bro
	scripts/base/protocols/ftp/main.bro
	scripts/base/protocols/irc/dcc-send.bro
	scripts/test-all-policy.bro
	src/AnalyzerTags.h
	src/CMakeLists.txt
	src/analyzer/Analyzer.cc
	src/analyzer/protocol/file/File.cc
	src/analyzer/protocol/file/File.h
	src/analyzer/protocol/http/HTTP.cc
	src/analyzer/protocol/http/HTTP.h
	src/analyzer/protocol/mime/MIME.cc
	src/event.bif
	src/main.cc
	src/util-config.h.in
	testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log
	testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log
	testing/btest/Baseline/istate.events-ssl/receiver.http.log
	testing/btest/Baseline/istate.events-ssl/sender.http.log
	testing/btest/Baseline/istate.events/receiver.http.log
	testing/btest/Baseline/istate.events/sender.http.log
This commit is contained in:
Robin Sommer 2013-05-16 17:58:48 -07:00
commit eb637f9f3e
411 changed files with 240276 additions and 161868 deletions

View file

@ -0,0 +1,104 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef FILE_ANALYSIS_ANALYZER_H
#define FILE_ANALYSIS_ANALYZER_H
#include "Val.h"
#include "NetVar.h"
namespace file_analysis {
typedef BifEnum::FileAnalysis::Analyzer FA_Tag;
class File;
/**
* Base class for analyzers that can be attached to file_analysis::File objects.
*/
class Analyzer {
public:
virtual ~Analyzer()
{
DBG_LOG(DBG_FILE_ANALYSIS, "Destroy file analyzer %d", tag);
Unref(args);
}
/**
* Subclasses may override this to receive file data non-sequentially.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset)
{ return true; }
/**
* Subclasses may override this to receive file sequentially.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool DeliverStream(const u_char* data, uint64 len)
{ return true; }
/**
* Subclasses may override this to specifically handle an EOF signal,
* which means no more data is going to be incoming and the analyzer
* may be deleted/cleaned up soon.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool EndOfFile()
{ return true; }
/**
* Subclasses may override this to handle missing data in a file stream.
* @return true if the analyzer is still in a valid state to continue
* receiving data/events or false if it's essentially "done".
*/
virtual bool Undelivered(uint64 offset, uint64 len)
{ return true; }
/**
* @return the analyzer type enum value.
*/
FA_Tag Tag() const { return tag; }
/**
* @return the AnalyzerArgs associated with the analyzer.
*/
RecordVal* Args() const { return args; }
/**
* @return the file_analysis::File object to which the analyzer is attached.
*/
File* GetFile() const { return file; }
/**
* @return the analyzer tag equivalent of the 'tag' field from the
* AnalyzerArgs value \a args.
*/
static FA_Tag ArgsTag(const RecordVal* args)
{
using BifType::Record::FileAnalysis::AnalyzerArgs;
return static_cast<FA_Tag>(
args->Lookup(AnalyzerArgs->FieldOffset("tag"))->AsEnum());
}
protected:
Analyzer(RecordVal* arg_args, File* arg_file)
: tag(file_analysis::Analyzer::ArgsTag(arg_args)),
args(arg_args->Ref()->AsRecordVal()),
file(arg_file)
{}
private:
FA_Tag tag;
RecordVal* args;
File* file;
};
typedef file_analysis::Analyzer* (*AnalyzerInstantiator)(RecordVal* args,
File* file);
} // namespace file_analysis
#endif

View file

@ -0,0 +1,195 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "AnalyzerSet.h"
#include "File.h"
#include "Analyzer.h"
#include "Extract.h"
#include "DataEvent.h"
#include "Hash.h"
using namespace file_analysis;
// keep in order w/ declared enum values in file_analysis.bif
static AnalyzerInstantiator analyzer_factory[] = {
file_analysis::Extract::Instantiate,
file_analysis::MD5::Instantiate,
file_analysis::SHA1::Instantiate,
file_analysis::SHA256::Instantiate,
file_analysis::DataEvent::Instantiate,
};
static void analyzer_del_func(void* v)
{
delete (file_analysis::Analyzer*) v;
}
AnalyzerSet::AnalyzerSet(File* arg_file) : file(arg_file)
{
TypeList* t = new TypeList();
t->Append(BifType::Record::FileAnalysis::AnalyzerArgs->Ref());
analyzer_hash = new CompositeHash(t);
Unref(t);
analyzer_map.SetDeleteFunc(analyzer_del_func);
}
AnalyzerSet::~AnalyzerSet()
{
while ( ! mod_queue.empty() )
{
Modification* mod = mod_queue.front();
mod->Abort();
delete mod;
mod_queue.pop();
}
delete analyzer_hash;
}
bool AnalyzerSet::Add(RecordVal* args)
{
HashKey* key = GetKey(args);
if ( analyzer_map.Lookup(key) )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %d skipped for file id"
" %s: already exists", file_analysis::Analyzer::ArgsTag(args),
file->GetID().c_str());
delete key;
return true;
}
file_analysis::Analyzer* a = InstantiateAnalyzer(args);
if ( ! a )
{
delete key;
return false;
}
Insert(a, key);
return true;
}
bool AnalyzerSet::QueueAdd(RecordVal* args)
{
HashKey* key = GetKey(args);
file_analysis::Analyzer* a = InstantiateAnalyzer(args);
if ( ! a )
{
delete key;
return false;
}
mod_queue.push(new AddMod(a, key));
return true;
}
bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set)
{
if ( set->analyzer_map.Lookup(key) )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %d skipped for file id"
" %s: already exists", a->Tag(), a->GetFile()->GetID().c_str());
Abort();
return true;
}
set->Insert(a, key);
return true;
}
bool AnalyzerSet::Remove(const RecordVal* args)
{
return Remove(file_analysis::Analyzer::ArgsTag(args), GetKey(args));
}
bool AnalyzerSet::Remove(FA_Tag tag, HashKey* key)
{
file_analysis::Analyzer* a =
(file_analysis::Analyzer*) analyzer_map.Remove(key);
delete key;
if ( ! a )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Skip remove analyzer %d for file id %s",
tag, file->GetID().c_str());
return false;
}
DBG_LOG(DBG_FILE_ANALYSIS, "Remove analyzer %d for file id %s", a->Tag(),
file->GetID().c_str());
delete a;
return true;
}
bool AnalyzerSet::QueueRemove(const RecordVal* args)
{
HashKey* key = GetKey(args);
FA_Tag tag = file_analysis::Analyzer::ArgsTag(args);
mod_queue.push(new RemoveMod(tag, key));
return analyzer_map.Lookup(key);
}
bool AnalyzerSet::RemoveMod::Perform(AnalyzerSet* set)
{
return set->Remove(tag, key);
}
HashKey* AnalyzerSet::GetKey(const RecordVal* args) const
{
HashKey* key = analyzer_hash->ComputeHash(args, 1);
if ( ! key )
reporter->InternalError("AnalyzerArgs type mismatch");
return key;
}
file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(RecordVal* args) const
{
file_analysis::Analyzer* a =
analyzer_factory[file_analysis::Analyzer::ArgsTag(args)](args, file);
if ( ! a )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %d failed for file id",
" %s", file_analysis::Analyzer::ArgsTag(args),
file->GetID().c_str());
return 0;
}
return a;
}
void AnalyzerSet::Insert(file_analysis::Analyzer* a, HashKey* key)
{
DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %d for file id %s", a->Tag(),
file->GetID().c_str());
analyzer_map.Insert(key, a);
delete key;
}
void AnalyzerSet::DrainModifications()
{
if ( mod_queue.empty() )
return;
DBG_LOG(DBG_FILE_ANALYSIS, "Start analyzer mod queue flush of file id %s",
file->GetID().c_str());
do
{
Modification* mod = mod_queue.front();
mod->Perform(this);
delete mod;
mod_queue.pop();
} while ( ! mod_queue.empty() );
DBG_LOG(DBG_FILE_ANALYSIS, "End flushing analyzer mod queue of file id %s",
file->GetID().c_str());
}

View file

@ -0,0 +1,110 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef FILE_ANALYSIS_ANALYZERSET_H
#define FILE_ANALYSIS_ANALYZERSET_H
#include <queue>
#include "Analyzer.h"
#include "Dict.h"
#include "CompHash.h"
#include "Val.h"
namespace file_analysis {
class File;
declare(PDict,Analyzer);
/**
* A set of file analysis analyzers indexed by AnalyzerArgs. Allows queueing
* of addition/removals so that those modifications can happen at well-defined
* times (e.g. to make sure a loop iterator isn't invalidated).
*/
class AnalyzerSet {
public:
AnalyzerSet(File* arg_file);
~AnalyzerSet();
/**
* @return true if analyzer was instantiated/attached, else false.
*/
bool Add(RecordVal* args);
/**
* @return true if analyzer was able to be instantiated, else false.
*/
bool QueueAdd(RecordVal* args);
/**
* @return false if analyzer didn't exist and so wasn't removed, else true.
*/
bool Remove(const RecordVal* args);
/**
* @return true if analyzer exists at time of call, else false;
*/
bool QueueRemove(const RecordVal* args);
/**
* Perform all queued modifications to the currently active analyzers.
*/
void DrainModifications();
IterCookie* InitForIteration() const
{ return analyzer_map.InitForIteration(); }
file_analysis::Analyzer* NextEntry(IterCookie* c)
{ return analyzer_map.NextEntry(c); }
protected:
HashKey* GetKey(const RecordVal* args) const;
file_analysis::Analyzer* InstantiateAnalyzer(RecordVal* args) const;
void Insert(file_analysis::Analyzer* a, HashKey* key);
bool Remove(FA_Tag tag, HashKey* key);
private:
File* file;
CompositeHash* analyzer_hash; /**< AnalyzerArgs hashes. */
PDict(file_analysis::Analyzer) analyzer_map; /**< Indexed by AnalyzerArgs. */
class Modification {
public:
virtual ~Modification() {}
virtual bool Perform(AnalyzerSet* set) = 0;
virtual void Abort() = 0;
};
class AddMod : public Modification {
public:
AddMod(file_analysis::Analyzer* arg_a, HashKey* arg_key)
: Modification(), a(arg_a), key(arg_key) {}
virtual ~AddMod() {}
virtual bool Perform(AnalyzerSet* set);
virtual void Abort() { delete a; delete key; }
protected:
file_analysis::Analyzer* a;
HashKey* key;
};
class RemoveMod : public Modification {
public:
RemoveMod(FA_Tag arg_tag, HashKey* arg_key)
: Modification(), tag(arg_tag), key(arg_key) {}
virtual ~RemoveMod() {}
virtual bool Perform(AnalyzerSet* set);
virtual void Abort() { delete key; }
protected:
FA_Tag tag;
HashKey* key;
};
typedef queue<Modification*> ModQueue;
ModQueue mod_queue;
};
} // namespace file_analysiss
#endif

View file

@ -0,0 +1,67 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include <string>
#include "DataEvent.h"
#include "EventRegistry.h"
#include "Event.h"
#include "util.h"
using namespace file_analysis;
DataEvent::DataEvent(RecordVal* args, File* file,
EventHandlerPtr ce, EventHandlerPtr se)
: file_analysis::Analyzer(args, file), chunk_event(ce), stream_event(se)
{
}
file_analysis::Analyzer* DataEvent::Instantiate(RecordVal* args, File* file)
{
using BifType::Record::FileAnalysis::AnalyzerArgs;
int chunk_off = AnalyzerArgs->FieldOffset("chunk_event");
int stream_off = AnalyzerArgs->FieldOffset("stream_event");
Val* chunk_val = args->Lookup(chunk_off);
Val* stream_val = args->Lookup(stream_off);
if ( ! chunk_val && ! stream_val ) return 0;
EventHandlerPtr chunk;
EventHandlerPtr stream;
if ( chunk_val )
chunk = event_registry->Lookup(chunk_val->AsFunc()->Name());
if ( stream_val )
stream = event_registry->Lookup(stream_val->AsFunc()->Name());
return new DataEvent(args, file, chunk, stream);
}
bool DataEvent::DeliverChunk(const u_char* data, uint64 len, uint64 offset)
{
if ( ! chunk_event ) return true;
val_list* args = new val_list;
args->append(GetFile()->GetVal()->Ref());
args->append(new StringVal(new BroString(data, len, 0)));
args->append(new Val(offset, TYPE_COUNT));
mgr.QueueEvent(chunk_event, args);
return true;
}
bool DataEvent::DeliverStream(const u_char* data, uint64 len)
{
if ( ! stream_event ) return true;
val_list* args = new val_list;
args->append(GetFile()->GetVal()->Ref());
args->append(new StringVal(new BroString(data, len, 0)));
mgr.QueueEvent(stream_event, args);
return true;
}

View file

@ -0,0 +1,36 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef FILE_ANALYSIS_DATAEVENT_H
#define FILE_ANALYSIS_DATAEVENT_H
#include <string>
#include "Val.h"
#include "File.h"
#include "Analyzer.h"
namespace file_analysis {
/**
* An analyzer to send file data to script-layer events.
*/
class DataEvent : public file_analysis::Analyzer {
public:
virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset);
virtual bool DeliverStream(const u_char* data, uint64 len);
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file);
protected:
DataEvent(RecordVal* args, File* file,
EventHandlerPtr ce, EventHandlerPtr se);
private:
EventHandlerPtr chunk_event;
EventHandlerPtr stream_event;
};
} // namespace file_analysis
#endif

View file

@ -0,0 +1,48 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include <string>
#include "Extract.h"
#include "util.h"
using namespace file_analysis;
Extract::Extract(RecordVal* args, File* file, const string& arg_filename)
: file_analysis::Analyzer(args, file), filename(arg_filename)
{
fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666);
if ( fd < 0 )
{
fd = 0;
char buf[128];
strerror_r(errno, buf, sizeof(buf));
reporter->Error("cannot open %s: %s", filename.c_str(), buf);
}
}
Extract::~Extract()
{
if ( fd )
safe_close(fd);
}
file_analysis::Analyzer* Extract::Instantiate(RecordVal* args, File* file)
{
using BifType::Record::FileAnalysis::AnalyzerArgs;
Val* v = args->Lookup(AnalyzerArgs->FieldOffset("extract_filename"));
if ( ! v )
return 0;
return new Extract(args, file, v->AsString()->CheckString());
}
bool Extract::DeliverChunk(const u_char* data, uint64 len, uint64 offset)
{
if ( ! fd )
return false;
safe_pwrite(fd, data, len, offset);
return true;
}

View file

@ -0,0 +1,35 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef FILE_ANALYSIS_EXTRACT_H
#define FILE_ANALYSIS_EXTRACT_H
#include <string>
#include "Val.h"
#include "File.h"
#include "Analyzer.h"
namespace file_analysis {
/**
* An analyzer to extract files to disk.
*/
class Extract : public file_analysis::Analyzer {
public:
virtual ~Extract();
virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset);
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file);
protected:
Extract(RecordVal* args, File* file, const string& arg_filename);
private:
string filename;
int fd;
};
} // namespace file_analysis
#endif

451
src/file_analysis/File.cc Normal file
View file

@ -0,0 +1,451 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include <string>
#include <openssl/md5.h>
#include "File.h"
#include "FileTimer.h"
#include "FileID.h"
#include "Analyzer.h"
#include "Manager.h"
#include "Reporter.h"
#include "Val.h"
#include "Type.h"
#include "Event.h"
#include "analyzer/Analyzer.h"
#include "analyzer/Manager.h"
using namespace file_analysis;
static Val* empty_connection_table()
{
TypeList* tbl_index = new TypeList(conn_id);
tbl_index->Append(conn_id->Ref());
TableType* tbl_type = new TableType(tbl_index, connection_type->Ref());
Val* rval = new TableVal(tbl_type);
Unref(tbl_type);
return rval;
}
static RecordVal* get_conn_id_val(const Connection* conn)
{
RecordVal* v = new RecordVal(conn_id);
v->Assign(0, new AddrVal(conn->OrigAddr()));
v->Assign(1, new PortVal(ntohs(conn->OrigPort()), conn->ConnTransport()));
v->Assign(2, new AddrVal(conn->RespAddr()));
v->Assign(3, new PortVal(ntohs(conn->RespPort()), conn->ConnTransport()));
return v;
}
int File::id_idx = -1;
int File::parent_id_idx = -1;
int File::source_idx = -1;
int File::is_orig_idx = -1;
int File::conns_idx = -1;
int File::last_active_idx = -1;
int File::seen_bytes_idx = -1;
int File::total_bytes_idx = -1;
int File::missing_bytes_idx = -1;
int File::overflow_bytes_idx = -1;
int File::timeout_interval_idx = -1;
int File::bof_buffer_size_idx = -1;
int File::bof_buffer_idx = -1;
int File::mime_type_idx = -1;
string File::salt;
void File::StaticInit()
{
if ( id_idx != -1 )
return;
id_idx = Idx("id");
parent_id_idx = Idx("parent_id");
source_idx = Idx("source");
is_orig_idx = Idx("is_orig");
conns_idx = Idx("conns");
last_active_idx = Idx("last_active");
seen_bytes_idx = Idx("seen_bytes");
total_bytes_idx = Idx("total_bytes");
missing_bytes_idx = Idx("missing_bytes");
overflow_bytes_idx = Idx("overflow_bytes");
timeout_interval_idx = Idx("timeout_interval");
bof_buffer_size_idx = Idx("bof_buffer_size");
bof_buffer_idx = Idx("bof_buffer");
mime_type_idx = Idx("mime_type");
salt = BifConst::FileAnalysis::salt->CheckString();
}
File::File(const string& unique, Connection* conn, analyzer::Tag tag,
bool is_orig)
: id(""), unique(unique), val(0), postpone_timeout(false),
first_chunk(true), missed_bof(false), need_reassembly(false), done(false),
analyzers(this)
{
StaticInit();
char tmp[20];
uint64 hash[2];
string msg(unique + salt);
MD5(reinterpret_cast<const u_char*>(msg.data()), msg.size(),
reinterpret_cast<u_char*>(hash));
uitoa_n(hash[0], tmp, sizeof(tmp), 62);
DBG_LOG(DBG_FILE_ANALYSIS, "Creating new File object %s (%s)", tmp,
unique.c_str());
val = new RecordVal(fa_file_type);
val->Assign(id_idx, new StringVal(tmp));
id = FileID(tmp);
if ( conn )
{
// add source, connection, is_orig fields
val->Assign(source_idx, new StringVal(analyzer_mgr->GetAnalyzerName(tag)));
val->Assign(is_orig_idx, new Val(is_orig, TYPE_BOOL));
UpdateConnectionFields(conn);
}
else
// use the unique file handle as source
val->Assign(source_idx, new StringVal(unique.c_str()));
UpdateLastActivityTime();
}
File::~File()
{
DBG_LOG(DBG_FILE_ANALYSIS, "Destroying File object %s", id.c_str());
Unref(val);
}
void File::UpdateLastActivityTime()
{
val->Assign(last_active_idx, new Val(network_time, TYPE_TIME));
}
double File::GetLastActivityTime() const
{
return val->Lookup(last_active_idx)->AsTime();
}
void File::UpdateConnectionFields(Connection* conn)
{
if ( ! conn )
return;
Val* conns = val->Lookup(conns_idx);
bool is_first = false;
if ( ! conns )
{
is_first = true;
conns = empty_connection_table();
val->Assign(conns_idx, conns);
}
Val* idx = get_conn_id_val(conn);
if ( ! conns->AsTableVal()->Lookup(idx) )
{
Val* conn_val = conn->BuildConnVal();
conns->AsTableVal()->Assign(idx, conn_val);
if ( ! is_first && FileEventAvailable(file_over_new_connection) )
{
val_list* vl = new val_list();
vl->append(val->Ref());
vl->append(conn_val->Ref());
FileEvent(file_over_new_connection, vl);
}
}
Unref(idx);
}
uint64 File::LookupFieldDefaultCount(int idx) const
{
Val* v = val->LookupWithDefault(idx);
uint64 rval = v->AsCount();
Unref(v);
return rval;
}
double File::LookupFieldDefaultInterval(int idx) const
{
Val* v = val->LookupWithDefault(idx);
double rval = v->AsInterval();
Unref(v);
return rval;
}
int File::Idx(const string& field)
{
int rval = fa_file_type->FieldOffset(field.c_str());
if ( rval < 0 )
reporter->InternalError("Unknown fa_file field: %s", field.c_str());
return rval;
}
double File::GetTimeoutInterval() const
{
return LookupFieldDefaultInterval(timeout_interval_idx);
}
void File::SetTimeoutInterval(double interval)
{
val->Assign(timeout_interval_idx, new Val(interval, TYPE_INTERVAL));
}
void File::IncrementByteCount(uint64 size, int field_idx)
{
uint64 old = LookupFieldDefaultCount(field_idx);
val->Assign(field_idx, new Val(old + size, TYPE_COUNT));
}
void File::SetTotalBytes(uint64 size)
{
val->Assign(total_bytes_idx, new Val(size, TYPE_COUNT));
}
bool File::IsComplete() const
{
Val* total = val->Lookup(total_bytes_idx);
if ( ! total )
return false;
if ( LookupFieldDefaultCount(seen_bytes_idx) >= total->AsCount() )
return true;
return false;
}
void File::ScheduleInactivityTimer() const
{
timer_mgr->Add(new FileTimer(network_time, id, GetTimeoutInterval()));
}
bool File::AddAnalyzer(RecordVal* args)
{
return done ? false : analyzers.QueueAdd(args);
}
bool File::RemoveAnalyzer(const RecordVal* args)
{
return done ? false : analyzers.QueueRemove(args);
}
bool File::BufferBOF(const u_char* data, uint64 len)
{
if ( bof_buffer.full || bof_buffer.replayed )
return false;
uint64 desired_size = LookupFieldDefaultCount(bof_buffer_size_idx);
bof_buffer.chunks.push_back(new BroString(data, len, 0));
bof_buffer.size += len;
if ( bof_buffer.size >= desired_size )
{
bof_buffer.full = true;
ReplayBOF();
}
return true;
}
bool File::DetectMIME(const u_char* data, uint64 len)
{
const char* mime = bro_magic_buffer(magic_mime_cookie, data, len);
if ( mime )
{
const char* mime_end = strchr(mime, ';');
if ( mime_end )
// strip off charset
val->Assign(mime_type_idx, new StringVal(mime_end - mime, mime));
else
val->Assign(mime_type_idx, new StringVal(mime));
}
return mime;
}
void File::ReplayBOF()
{
if ( bof_buffer.replayed )
return;
bof_buffer.replayed = true;
if ( bof_buffer.chunks.empty() )
{
// Since we missed the beginning, try file type detect on next data in.
missed_bof = true;
return;
}
BroString* bs = concatenate(bof_buffer.chunks);
val->Assign(bof_buffer_idx, new StringVal(bs));
DetectMIME(bs->Bytes(), bs->Len());
FileEvent(file_new);
for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i )
DataIn(bof_buffer.chunks[i]->Bytes(), bof_buffer.chunks[i]->Len());
}
void File::DataIn(const u_char* data, uint64 len, uint64 offset)
{
analyzers.DrainModifications();
if ( first_chunk )
{
// TODO: this should all really be delayed until we attempt reassembly
DetectMIME(data, len);
FileEvent(file_new);
first_chunk = false;
}
file_analysis::Analyzer* a = 0;
IterCookie* c = analyzers.InitForIteration();
while ( (a = analyzers.NextEntry(c)) )
{
if ( ! a->DeliverChunk(data, len, offset) )
analyzers.QueueRemove(a->Args());
}
analyzers.DrainModifications();
// TODO: check reassembly requirement based on buffer size in record
if ( need_reassembly )
reporter->InternalError("file_analyzer::File TODO: reassembly not yet supported");
// TODO: reassembly overflow stuff, increment overflow count, eval trigger
IncrementByteCount(len, seen_bytes_idx);
}
void File::DataIn(const u_char* data, uint64 len)
{
analyzers.DrainModifications();
if ( BufferBOF(data, len) )
return;
if ( missed_bof )
{
DetectMIME(data, len);
FileEvent(file_new);
missed_bof = false;
}
file_analysis::Analyzer* a = 0;
IterCookie* c = analyzers.InitForIteration();
while ( (a = analyzers.NextEntry(c)) )
{
if ( ! a->DeliverStream(data, len) )
{
analyzers.QueueRemove(a->Args());
continue;
}
uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) +
LookupFieldDefaultCount(missing_bytes_idx);
if ( ! a->DeliverChunk(data, len, offset) )
analyzers.QueueRemove(a->Args());
}
analyzers.DrainModifications();
IncrementByteCount(len, seen_bytes_idx);
}
void File::EndOfFile()
{
if ( done )
return;
analyzers.DrainModifications();
// Send along anything that's been buffered, but never flushed.
ReplayBOF();
done = true;
file_analysis::Analyzer* a = 0;
IterCookie* c = analyzers.InitForIteration();
while ( (a = analyzers.NextEntry(c)) )
{
if ( ! a->EndOfFile() )
analyzers.QueueRemove(a->Args());
}
FileEvent(file_state_remove);
analyzers.DrainModifications();
}
void File::Gap(uint64 offset, uint64 len)
{
analyzers.DrainModifications();
// If we were buffering the beginning of the file, a gap means we've got
// as much contiguous stuff at the beginning as possible, so work with that.
ReplayBOF();
file_analysis::Analyzer* a = 0;
IterCookie* c = analyzers.InitForIteration();
while ( (a = analyzers.NextEntry(c)) )
{
if ( ! a->Undelivered(offset, len) )
analyzers.QueueRemove(a->Args());
}
if ( FileEventAvailable(file_gap) )
{
val_list* vl = new val_list();
vl->append(val->Ref());
vl->append(new Val(offset, TYPE_COUNT));
vl->append(new Val(len, TYPE_COUNT));
FileEvent(file_gap, vl);
}
analyzers.DrainModifications();
IncrementByteCount(len, missing_bytes_idx);
}
bool File::FileEventAvailable(EventHandlerPtr h)
{
return h && ! file_mgr->IsIgnored(unique);
}
void File::FileEvent(EventHandlerPtr h)
{
if ( ! FileEventAvailable(h) )
return;
val_list* vl = new val_list();
vl->append(val->Ref());
FileEvent(h, vl);
}
void File::FileEvent(EventHandlerPtr h, val_list* vl)
{
mgr.QueueEvent(h, vl);
if ( h == file_new || h == file_timeout )
{
// immediate feedback is required for these events.
mgr.Drain();
analyzers.DrainModifications();
}
}

230
src/file_analysis/File.h Normal file
View file

@ -0,0 +1,230 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef FILE_ANALYSIS_FILE_H
#define FILE_ANALYSIS_FILE_H
#include <string>
#include <vector>
#include "AnalyzerTags.h"
#include "Conn.h"
#include "Val.h"
#include "AnalyzerSet.h"
#include "FileID.h"
#include "BroString.h"
namespace file_analysis {
/**
* Wrapper class around \c fa_file record values from script layer.
*/
class File {
public:
~File();
/**
* @return the #val record.
*/
RecordVal* GetVal() const { return val; }
/**
* @return value (seconds) of the "timeout_interval" field from #val record.
*/
double GetTimeoutInterval() const;
/**
* Set the "timeout_interval" field from #val record to \a interval seconds.
*/
void SetTimeoutInterval(double interval);
/**
* @return value of the "id" field from #val record.
*/
FileID GetID() const { return id; }
/**
* @return the string which uniquely identifies the file.
*/
string GetUnique() const { return unique; }
/**
* @return value of "last_active" field in #val record;
*/
double GetLastActivityTime() const;
/**
* Refreshes "last_active" field of #val record with current network time.
*/
void UpdateLastActivityTime();
/**
* Set "total_bytes" field of #val record to \a size.
*/
void SetTotalBytes(uint64 size);
/**
* Compares "seen_bytes" field to "total_bytes" field of #val record
* and returns true if the comparison indicates the full file was seen.
* If "total_bytes" hasn't been set yet, it returns false.
*/
bool IsComplete() const;
/**
* Create a timer to be dispatched after the amount of time indicated by
* the "timeout_interval" field of the #val record in order to check if
* "last_active" field is old enough to timeout analysis of the file.
*/
void ScheduleInactivityTimer() const;
/**
* Queues attaching an analyzer. Only one analyzer per type can be attached
* at a time unless the arguments differ.
* @return false if analyzer can't be instantiated, else true.
*/
bool AddAnalyzer(RecordVal* args);
/**
* Queues removal of an analyzer.
* @return true if analyzer was active at time of call, else false.
*/
bool RemoveAnalyzer(const RecordVal* args);
/**
* Pass in non-sequential data and deliver to attached analyzers.
*/
void DataIn(const u_char* data, uint64 len, uint64 offset);
/**
* Pass in sequential data and deliver to attached analyzers.
*/
void DataIn(const u_char* data, uint64 len);
/**
* Inform attached analyzers about end of file being seen.
*/
void EndOfFile();
/**
* Inform attached analyzers about a gap in file stream.
*/
void Gap(uint64 offset, uint64 len);
/**
* @return true if event has a handler and the file isn't ignored.
*/
bool FileEventAvailable(EventHandlerPtr h);
/**
* Raises an event related to the file's life-cycle, the only parameter
* to that event is the \c fa_file record..
*/
void FileEvent(EventHandlerPtr h);
/**
* Raises an event related to the file's life-cycle.
*/
void FileEvent(EventHandlerPtr h, val_list* vl);
protected:
friend class Manager;
/**
* Constructor; only file_analysis::Manager should be creating these.
*/
File(const string& unique, Connection* conn = 0,
analyzer::Tag tag = AnalyzerTag::Error, bool is_orig = false);
/**
* Updates the "conn_ids" and "conn_uids" fields in #val record with the
* \c conn_id and UID taken from \a conn.
*/
void UpdateConnectionFields(Connection* conn);
/**
* Increment a byte count field of #val record by \a size.
*/
void IncrementByteCount(uint64 size, int field_idx);
/**
* Wrapper to RecordVal::LookupWithDefault for the field in #val at index
* \a idx which automatically unrefs the Val and returns a converted value.
*/
uint64 LookupFieldDefaultCount(int idx) const;
/**
* Wrapper to RecordVal::LookupWithDefault for the field in #val at index
* \a idx which automatically unrefs the Val and returns a converted value.
*/
double LookupFieldDefaultInterval(int idx) const;
/**
* Buffers incoming data at the beginning of a file.
* @return true if buffering is still required, else false
*/
bool BufferBOF(const u_char* data, uint64 len);
/**
* Forward any beginning-of-file buffered data on to DataIn stream.
*/
void ReplayBOF();
/**
* Does mime type detection and assigns type (if available) to \c mime_type
* field in #val.
* @return whether mime type was available.
*/
bool DetectMIME(const u_char* data, uint64 len);
/**
* @return the field offset in #val record corresponding to \a field_name.
*/
static int Idx(const string& field_name);
/**
* Initializes static member.
*/
static void StaticInit();
private:
FileID id; /**< A pretty hash that likely identifies file */
string unique; /**< A string that uniquely identifies file */
RecordVal* val; /**< \c fa_file from script layer. */
bool postpone_timeout; /**< Whether postponing timeout is requested. */
bool first_chunk; /**< Track first non-linear chunk. */
bool missed_bof; /**< Flags that we missed start of file. */
bool need_reassembly; /**< Whether file stream reassembly is needed. */
bool done; /**< If this object is about to be deleted. */
AnalyzerSet analyzers;
struct BOF_Buffer {
BOF_Buffer() : full(false), replayed(false), size(0) {}
~BOF_Buffer()
{ for ( size_t i = 0; i < chunks.size(); ++i ) delete chunks[i]; }
bool full;
bool replayed;
uint64 size;
BroString::CVec chunks;
} bof_buffer; /**< Beginning of file buffer. */
static string salt;
static int id_idx;
static int parent_id_idx;
static int source_idx;
static int is_orig_idx;
static int conns_idx;
static int last_active_idx;
static int seen_bytes_idx;
static int total_bytes_idx;
static int missing_bytes_idx;
static int overflow_bytes_idx;
static int timeout_interval_idx;
static int bof_buffer_size_idx;
static int bof_buffer_idx;
static int mime_type_idx;
};
} // namespace file_analysis
#endif

View file

@ -0,0 +1,34 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef FILE_ANALYSIS_FILEID_H
#define FILE_ANALYSIS_FILEID_H
namespace file_analysis {
/**
* A simple string wrapper class to help enforce some type safety between
* methods of FileAnalysis::Manager, some of which use a unique string to
* identify files, and others which use a pretty hash (the FileID) to identify
* files. A FileID is primarily used in methods which interface with the
* script-layer, while the unique strings are used for methods which interface
* with protocol analyzers or anything that sends data to the file analysis
* framework.
*/
struct FileID {
string id;
explicit FileID(const string arg_id) : id(arg_id) {}
FileID(const FileID& other) : id(other.id) {}
const char* c_str() const { return id.c_str(); }
bool operator==(const FileID& rhs) const { return id == rhs.id; }
bool operator<(const FileID& rhs) const { return id < rhs.id; }
FileID& operator=(const FileID& rhs) { id = rhs.id; return *this; }
FileID& operator=(const string& rhs) { id = rhs; return *this; }
};
} // namespace file_analysis
#endif

View file

@ -0,0 +1,40 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "Manager.h"
#include "File.h"
using namespace file_analysis;
FileTimer::FileTimer(double t, const FileID& id, double interval)
: Timer(t + interval, TIMER_FILE_ANALYSIS_INACTIVITY), file_id(id)
{
DBG_LOG(DBG_FILE_ANALYSIS, "New %f second timeout timer for %s",
file_id.c_str(), interval);
}
void FileTimer::Dispatch(double t, int is_expire)
{
File* file = file_mgr->Lookup(file_id);
if ( ! file )
return;
double last_active = file->GetLastActivityTime();
double inactive_time = t > last_active ? t - last_active : 0.0;
DBG_LOG(DBG_FILE_ANALYSIS, "Checking inactivity for %s, last active at %f, "
"inactive for %f", file_id.c_str(), last_active, inactive_time);
if ( last_active == 0.0 )
{
// was created when network_time was zero, so re-schedule w/ valid time
file->UpdateLastActivityTime();
file->ScheduleInactivityTimer();
return;
}
if ( inactive_time >= file->GetTimeoutInterval() )
file_mgr->Timeout(file_id);
else if ( ! is_expire )
file->ScheduleInactivityTimer();
}

View file

@ -0,0 +1,31 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef FILE_ANALYSIS_FILETIMER_H
#define FILE_ANALYSIS_FILETIMER_H
#include <string>
#include "Timer.h"
#include "FileID.h"
namespace file_analysis {
/**
* Timer to periodically check if file analysis for a given file is inactive.
*/
class FileTimer : public Timer {
public:
FileTimer(double t, const FileID& id, double interval);
/**
* Check inactivity of file_analysis::File corresponding to #file_id,
* reschedule if active, else call file_analysis::Manager::Timeout.
*/
void Dispatch(double t, int is_expire);
private:
FileID file_id;
};
} // namespace file_analysis
#endif

56
src/file_analysis/Hash.cc Normal file
View file

@ -0,0 +1,56 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include <string>
#include "Hash.h"
#include "util.h"
#include "Event.h"
using namespace file_analysis;
Hash::Hash(RecordVal* args, File* file, HashVal* hv, const char* arg_kind)
: file_analysis::Analyzer(args, file), hash(hv), fed(false), kind(arg_kind)
{
hash->Init();
}
Hash::~Hash()
{
Unref(hash);
}
bool Hash::DeliverStream(const u_char* data, uint64 len)
{
if ( ! hash->IsValid() )
return false;
if ( ! fed )
fed = len > 0;
hash->Feed(data, len);
return true;
}
bool Hash::EndOfFile()
{
Finalize();
return false;
}
bool Hash::Undelivered(uint64 offset, uint64 len)
{
return false;
}
void Hash::Finalize()
{
if ( ! hash->IsValid() || ! fed )
return;
val_list* vl = new val_list();
vl->append(GetFile()->GetVal()->Ref());
vl->append(new StringVal(kind));
vl->append(hash->Get());
mgr.QueueEvent(file_hash, vl);
}

74
src/file_analysis/Hash.h Normal file
View file

@ -0,0 +1,74 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef FILE_ANALYSIS_HASH_H
#define FILE_ANALYSIS_HASH_H
#include <string>
#include "Val.h"
#include "OpaqueVal.h"
#include "File.h"
#include "Analyzer.h"
namespace file_analysis {
/**
* An analyzer to produce a hash of file contents.
*/
class Hash : public file_analysis::Analyzer {
public:
virtual ~Hash();
virtual bool DeliverStream(const u_char* data, uint64 len);
virtual bool EndOfFile();
virtual bool Undelivered(uint64 offset, uint64 len);
protected:
Hash(RecordVal* args, File* file, HashVal* hv, const char* kind);
void Finalize();
private:
HashVal* hash;
bool fed;
const char* kind;
};
class MD5 : public Hash {
public:
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file)
{ return file_hash ? new MD5(args, file) : 0; }
protected:
MD5(RecordVal* args, File* file)
: Hash(args, file, new MD5Val(), "md5")
{}
};
class SHA1 : public Hash {
public:
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file)
{ return file_hash ? new SHA1(args, file) : 0; }
protected:
SHA1(RecordVal* args, File* file)
: Hash(args, file, new SHA1Val(), "sha1")
{}
};
class SHA256 : public Hash {
public:
static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file)
{ return file_hash ? new SHA256(args, file) : 0; }
protected:
SHA256(RecordVal* args, File* file)
: Hash(args, file, new SHA256Val(), "sha256")
{}
};
} // namespace file_analysis
#endif

View file

@ -0,0 +1,359 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include <vector>
#include <string>
#include "Manager.h"
#include "File.h"
#include "Analyzer.h"
#include "Var.h"
#include "Event.h"
using namespace file_analysis;
TableVal* Manager::disabled = 0;
Manager::Manager()
{
}
Manager::~Manager()
{
Terminate();
}
void Manager::Terminate()
{
vector<FileID> keys;
for ( IDMap::iterator it = id_map.begin(); it != id_map.end(); ++it )
keys.push_back(it->first);
for ( size_t i = 0; i < keys.size(); ++i )
Timeout(keys[i], true);
}
void Manager::SetHandle(const string& handle)
{
current_handle = handle;
}
void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
analyzer::Tag tag, Connection* conn, bool is_orig)
{
if ( IsDisabled(tag) )
return;
GetFileHandle(tag, conn, is_orig);
DataIn(data, len, offset, GetFile(current_handle, conn, tag, is_orig));
}
void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
const string& unique)
{
DataIn(data, len, offset, GetFile(unique));
}
void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
File* file)
{
if ( ! file )
return;
file->DataIn(data, len, offset);
if ( file->IsComplete() )
RemoveFile(file->GetUnique());
}
void Manager::DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
Connection* conn, bool is_orig)
{
if ( IsDisabled(tag) )
return;
GetFileHandle(tag, conn, is_orig);
// Sequential data input shouldn't be going over multiple conns, so don't
// do the check to update connection set.
DataIn(data, len, GetFile(current_handle, conn, tag, is_orig, false));
}
void Manager::DataIn(const u_char* data, uint64 len, const string& unique)
{
DataIn(data, len, GetFile(unique));
}
void Manager::DataIn(const u_char* data, uint64 len, File* file)
{
if ( ! file )
return;
file->DataIn(data, len);
if ( file->IsComplete() )
RemoveFile(file->GetUnique());
}
void Manager::EndOfFile(analyzer::Tag tag, Connection* conn)
{
EndOfFile(tag, conn, true);
EndOfFile(tag, conn, false);
}
void Manager::EndOfFile(analyzer::Tag tag, Connection* conn, bool is_orig)
{
if ( IsDisabled(tag) )
return;
GetFileHandle(tag, conn, is_orig);
EndOfFile(current_handle);
}
void Manager::EndOfFile(const string& unique)
{
RemoveFile(unique);
}
void Manager::Gap(uint64 offset, uint64 len, analyzer::Tag tag,
Connection* conn, bool is_orig)
{
if ( IsDisabled(tag) )
return;
GetFileHandle(tag, conn, is_orig);
Gap(offset, len, GetFile(current_handle, conn, tag, is_orig));
}
void Manager::Gap(uint64 offset, uint64 len, const string& unique)
{
Gap(offset, len, GetFile(unique));
}
void Manager::Gap(uint64 offset, uint64 len, File* file)
{
if ( ! file )
return;
file->Gap(offset, len);
}
void Manager::SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
bool is_orig)
{
if ( IsDisabled(tag) )
return;
GetFileHandle(tag, conn, is_orig);
SetSize(size, GetFile(current_handle, conn, tag, is_orig));
}
void Manager::SetSize(uint64 size, const string& unique)
{
SetSize(size, GetFile(unique));
}
void Manager::SetSize(uint64 size, File* file)
{
if ( ! file )
return;
file->SetTotalBytes(size);
if ( file->IsComplete() )
RemoveFile(file->GetUnique());
}
bool Manager::PostponeTimeout(const FileID& file_id) const
{
File* file = Lookup(file_id);
if ( ! file )
return false;
file->postpone_timeout = true;
return true;
}
bool Manager::SetTimeoutInterval(const FileID& file_id, double interval) const
{
File* file = Lookup(file_id);
if ( ! file )
return false;
file->SetTimeoutInterval(interval);
return true;
}
bool Manager::AddAnalyzer(const FileID& file_id, RecordVal* args) const
{
File* file = Lookup(file_id);
if ( ! file )
return false;
return file->AddAnalyzer(args);
}
bool Manager::RemoveAnalyzer(const FileID& file_id, const RecordVal* args) const
{
File* file = Lookup(file_id);
if ( ! file )
return false;
return file->RemoveAnalyzer(args);
}
File* Manager::GetFile(const string& unique, Connection* conn,
analyzer::Tag tag, bool is_orig, bool update_conn)
{
if ( unique.empty() )
return 0;
if ( IsIgnored(unique) )
return 0;
File* rval = str_map[unique];
if ( ! rval )
{
rval = str_map[unique] = new File(unique, conn, tag, is_orig);
FileID id = rval->GetID();
if ( id_map[id] )
{
reporter->Error("Evicted duplicate file ID: %s", id.c_str());
RemoveFile(unique);
}
id_map[id] = rval;
rval->ScheduleInactivityTimer();
if ( IsIgnored(unique) )
return 0;
}
else
{
rval->UpdateLastActivityTime();
if ( update_conn )
rval->UpdateConnectionFields(conn);
}
return rval;
}
File* Manager::Lookup(const FileID& file_id) const
{
IDMap::const_iterator it = id_map.find(file_id);
if ( it == id_map.end() )
return 0;
return it->second;
}
void Manager::Timeout(const FileID& file_id, bool is_terminating)
{
File* file = Lookup(file_id);
if ( ! file )
return;
file->postpone_timeout = false;
file->FileEvent(file_timeout);
if ( file->postpone_timeout && ! is_terminating )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Postpone file analysis timeout for %s",
file->GetID().c_str());
file->UpdateLastActivityTime();
file->ScheduleInactivityTimer();
return;
}
DBG_LOG(DBG_FILE_ANALYSIS, "File analysis timeout for %s",
file->GetID().c_str());
RemoveFile(file->GetUnique());
}
bool Manager::IgnoreFile(const FileID& file_id)
{
IDMap::iterator it = id_map.find(file_id);
if ( it == id_map.end() )
return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Ignore FileID %s", file_id.c_str());
ignored.insert(it->second->GetUnique());
return true;
}
bool Manager::RemoveFile(const string& unique)
{
StrMap::iterator it = str_map.find(unique);
if ( it == str_map.end() )
return false;
it->second->EndOfFile();
FileID id = it->second->GetID();
DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", id.c_str());
if ( ! id_map.erase(id) )
reporter->Error("No mapping for fileID %s", id.c_str());
ignored.erase(unique);
delete it->second;
str_map.erase(unique);
return true;
}
bool Manager::IsIgnored(const string& unique)
{
return ignored.find(unique) != ignored.end();
}
void Manager::GetFileHandle(analyzer::Tag tag, Connection* c, bool is_orig)
{
current_handle.clear();
if ( ! get_file_handle )
return;
EnumVal* tagval = tag.AsEnumVal();
Ref(tagval);
val_list* vl = new val_list();
vl->append(tagval);
vl->append(c->BuildConnVal());
vl->append(new Val(is_orig, TYPE_BOOL));
mgr.QueueEvent(get_file_handle, vl);
mgr.Drain(); // need file handle immediately so we don't have to buffer data
}
bool Manager::IsDisabled(analyzer::Tag tag)
{
if ( ! disabled )
disabled = internal_const_val("FileAnalysis::disable")->AsTableVal();
Val* index = new Val(tag, TYPE_COUNT);
Val* yield = disabled->Lookup(index);
Unref(index);
if ( ! yield )
return false;
bool rval = yield->AsBool();
Unref(yield);
return rval;
}

183
src/file_analysis/Manager.h Normal file
View file

@ -0,0 +1,183 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef FILE_ANALYSIS_MANAGER_H
#define FILE_ANALYSIS_MANAGER_H
#include <string>
#include <map>
#include <set>
#include <queue>
#include "Net.h"
#include "Conn.h"
#include "Val.h"
#include "Analyzer.h"
#include "Timer.h"
#include "EventHandler.h"
#include "File.h"
#include "FileTimer.h"
#include "FileID.h"
#include "analyzer/Tag.h"
namespace file_analysis {
/**
* Main entry point for interacting with file analysis.
*/
class Manager {
public:
Manager();
~Manager();
/**
* Times out any active file analysis to prepare for shutdown.
*/
void Terminate();
/**
* Take in a unique file handle string to identifiy incoming file data.
*/
void SetHandle(const string& handle);
/**
* Pass in non-sequential file data.
*/
void DataIn(const u_char* data, uint64 len, uint64 offset,
analyzer::Tag tag, Connection* conn, bool is_orig);
void DataIn(const u_char* data, uint64 len, uint64 offset,
const string& unique);
void DataIn(const u_char* data, uint64 len, uint64 offset,
File* file);
/**
* Pass in sequential file data.
*/
void DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
Connection* conn, bool is_orig);
void DataIn(const u_char* data, uint64 len, const string& unique);
void DataIn(const u_char* data, uint64 len, File* file);
/**
* Signal the end of file data.
*/
void EndOfFile(analyzer::Tag tag, Connection* conn);
void EndOfFile(analyzer::Tag tag, Connection* conn, bool is_orig);
void EndOfFile(const string& unique);
/**
* Signal a gap in the file data stream.
*/
void Gap(uint64 offset, uint64 len, analyzer::Tag tag, Connection* conn,
bool is_orig);
void Gap(uint64 offset, uint64 len, const string& unique);
void Gap(uint64 offset, uint64 len, File* file);
/**
* Provide the expected number of bytes that comprise a file.
*/
void SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
bool is_orig);
void SetSize(uint64 size, const string& unique);
void SetSize(uint64 size, File* file);
/**
* Starts ignoring a file, which will finally be removed from internal
* mappings on EOF or TIMEOUT.
* @return false if file identifier did not map to anything, else true.
*/
bool IgnoreFile(const FileID& file_id);
/**
* If called during a \c file_timeout event handler, requests deferral of
* analysis timeout.
*/
bool PostponeTimeout(const FileID& file_id) const;
/**
* Set's an inactivity threshold for the file.
*/
bool SetTimeoutInterval(const FileID& file_id, double interval) const;
/**
* Queue attachment of an analzer to the file identifier. Multiple
* analyzers of a given type can be attached per file identifier at a time
* as long as the arguments differ.
* @return false if the analyzer failed to be instantiated, else true.
*/
bool AddAnalyzer(const FileID& file_id, RecordVal* args) const;
/**
* Queue removal of an analyzer for a given file identifier.
* @return true if the analyzer is active at the time of call, else false.
*/
bool RemoveAnalyzer(const FileID& file_id, const RecordVal* args) const;
/**
* @return whether the file mapped to \a unique is being ignored.
*/
bool IsIgnored(const string& unique);
protected:
friend class FileTimer;
typedef map<string, File*> StrMap;
typedef set<string> StrSet;
typedef map<FileID, File*> IDMap;
/**
* @return the File object mapped to \a unique or a null pointer if analysis
* is being ignored for the associated file. An File object may be
* created if a mapping doesn't exist, and if it did exist, the
* activity time is refreshed along with any connection-related
* fields.
*/
File* GetFile(const string& unique, Connection* conn = 0,
analyzer::Tag tag = AnalyzerTag::Error,
bool is_orig = false, bool update_conn = true);
/**
* @return the File object mapped to \a file_id, or a null pointer if no
* mapping exists.
*/
File* Lookup(const FileID& file_id) const;
/**
* Evaluate timeout policy for a file and remove the File object mapped to
* \a file_id if needed.
*/
void Timeout(const FileID& file_id, bool is_terminating = ::terminating);
/**
* Immediately remove file_analysis::File object associated with \a unique.
* @return false if file string did not map to anything, else true.
*/
bool RemoveFile(const string& unique);
/**
* Sets #current_handle to a unique file handle string based on what the
* \c get_file_handle event derives from the connection params. The
* event queue is flushed so that we can get the handle value immediately.
*/
void GetFileHandle(analyzer::Tag tag, Connection* c, bool is_orig);
/**
* @return whether file analysis is disabled for the given analyzer.
*/
static bool IsDisabled(analyzer::Tag tag);
private:
StrMap str_map; /**< Map unique string to file_analysis::File. */
IDMap id_map; /**< Map file ID to file_analysis::File records. */
StrSet ignored; /**< Ignored files. Will be finally removed on EOF. */
string current_handle; /**< Last file handle set by get_file_handle event.*/
static TableVal* disabled; /**< Table of disabled analyzers. */
};
} // namespace file_analysis
extern file_analysis::Manager* file_mgr;
#endif