Prettify file analysis IDs to be more like connection uids.

This commit is contained in:
Jon Siwek 2013-02-20 22:09:39 -06:00
parent f8af42cf9a
commit ceb471fb36
10 changed files with 136 additions and 77 deletions

View file

@ -451,6 +451,7 @@ set(bro_SRCS
file_analysis/Manager.cc
file_analysis/Info.cc
file_analysis/InfoTimer.cc
file_analysis/FileID.h
file_analysis/Action.h
file_analysis/Extract.cc

View file

@ -21,7 +21,7 @@ File_Analyzer::File_Analyzer(Connection* conn)
char op[256], rp[256];
modp_ulitoa10(ntohs(conn->OrigPort()), op);
modp_ulitoa10(ntohs(conn->RespPort()), rp);
file_id = "TCPFile " + conn->OrigAddr().AsString() + ":" + op + "->" +
unique_file = "TCPFile " + conn->OrigAddr().AsString() + ":" + op + "->" +
conn->RespAddr().AsString() + ":" + rp;
}
@ -29,7 +29,7 @@ void File_Analyzer::DeliverStream(int len, const u_char* data, bool orig)
{
TCP_ApplicationAnalyzer::DeliverStream(len, data, orig);
file_mgr->DataIn(file_id, data, len, Conn());
file_mgr->DataIn(unique_file, data, len, Conn());
int n = min(len, BUFFER_SIZE - buffer_len);
@ -48,14 +48,14 @@ void File_Analyzer::Undelivered(int seq, int len, bool orig)
{
TCP_ApplicationAnalyzer::Undelivered(seq, len, orig);
file_mgr->Gap(file_id, seq, len);
file_mgr->Gap(unique_file, seq, len);
}
void File_Analyzer::Done()
{
TCP_ApplicationAnalyzer::Done();
file_mgr->EndOfFile(file_id, Conn());
file_mgr->EndOfFile(unique_file, Conn());
if ( buffer_len && buffer_len != BUFFER_SIZE )
Identify();

View file

@ -37,7 +37,7 @@ protected:
static magic_t magic;
static magic_t magic_mime;
string file_id;
string unique_file;
};
#endif

View file

@ -61,7 +61,8 @@ enum Action %{
function FileAnalysis::postpone_timeout%(file_id: string%): bool
%{
bool result = file_mgr->PostponeTimeout(file_id->CheckString());
using namespace file_analysis;
bool result = file_mgr->PostponeTimeout(FileID(file_id->CheckString()));
return new Val(result, TYPE_BOOL);
%}
@ -69,9 +70,10 @@ function FileAnalysis::add_action%(file_id: string,
action: FileAnalysis::Action,
args: any%): bool
%{
using namespace file_analysis;
RecordVal* rv = args->AsRecordVal()->CoerceTo(
BifType::Record::FileAnalysis::ActionArgs);
bool result = file_mgr->AddAction(file_id->CheckString(),
bool result = file_mgr->AddAction(FileID(file_id->CheckString()),
action->AsEnumVal(), rv);
Unref(rv);
return new Val(result, TYPE_BOOL);
@ -80,13 +82,15 @@ function FileAnalysis::add_action%(file_id: string,
function FileAnalysis::remove_action%(file_id: string,
action: FileAnalysis::Action%): bool
%{
bool result = file_mgr->RemoveAction(file_id->CheckString(),
using namespace file_analysis;
bool result = file_mgr->RemoveAction(FileID(file_id->CheckString()),
action->AsEnumVal());
return new Val(result, TYPE_BOOL);
%}
function FileAnalysis::stop%(file_id: string%): bool
%{
bool result = file_mgr->RemoveFile(file_id->CheckString());
using namespace file_analysis;
bool result = file_mgr->RemoveFile(FileID(file_id->CheckString()));
return new Val(result, TYPE_BOOL);
%}

View file

@ -0,0 +1,32 @@
#ifndef FILE_ANALYSIS_FILEID_H
#define FILE_ANALYSIS_FILEID_H
namespace file_analysis {
/**
* A simple string wrapper class to help enforce some type safety between
* methods of FileAnalysis::Manager, some of which use a unique string to
* identify files, and others which use a pretty hash (the FileID) to identify
* files. A FileID is primarily used in methods which interface with the
* script-layer, while the unique strings are used for methods which interface
* with protocol analyzers (to better accomodate the possibility that a file
* can be distributed over different connections and thus analyzer instances).
*/
struct FileID {
string id;
explicit FileID(const string arg_id) : id(arg_id) {}
FileID(const FileID& other) : id(other.id) {}
const char* c_str() const { return id.c_str(); }
bool operator==(const FileID& rhs) const { return id == rhs.id; }
bool operator<(const FileID& rhs) const { return id < rhs.id; }
FileID& operator=(const FileID& rhs) { id = rhs.id; return *this; }
FileID& operator=(const string& rhs) { id = rhs; return *this; }
};
} // namespace file_analysis
#endif

View file

@ -2,6 +2,7 @@
#include "Info.h"
#include "InfoTimer.h"
#include "FileID.h"
#include "Reporter.h"
#include "Val.h"
@ -70,17 +71,20 @@ void Info::InitFieldIndices()
action_args_idx = Idx("action_args");
}
Info::Info(const string& file_id, Connection* conn, const string& protocol)
: val(0), last_activity_time(network_time), postpone_timeout(false),
need_reassembly(false)
Info::Info(const string& unique, Connection* conn, const string& protocol)
: file_id(unique), unique(unique), val(0), last_activity_time(network_time),
postpone_timeout(false), need_reassembly(false)
{
DBG_LOG(DBG_FILE_ANALYSIS, "Creating new Info object %s", file_id.c_str());
InitFieldIndices();
char id[20];
uitoa_n(calculate_unique_id(), id, sizeof(id), 62);
DBG_LOG(DBG_FILE_ANALYSIS, "Creating new Info object %s", id);
val = new RecordVal(BifType::Record::FileAnalysis::Info);
// TODO: hash/prettify file_id for script layer presentation
val->Assign(file_id_idx, new StringVal(file_id.c_str()));
val->Assign(file_id_idx, new StringVal(id));
file_id = FileID(id);
UpdateConnectionFields(conn);
@ -96,7 +100,7 @@ Info::~Info()
for ( it = actions.begin(); it != actions.end(); ++it )
delete it->second;
DBG_LOG(DBG_FILE_ANALYSIS, "Destroying Info object %s", FileID().c_str());
DBG_LOG(DBG_FILE_ANALYSIS, "Destroying Info object %s",file_id.c_str());
Unref(val);
}
@ -145,11 +149,6 @@ double Info::TimeoutInterval() const
return LookupFieldDefaultInterval(timeout_interval_idx);
}
string Info::FileID() const
{
return val->Lookup(file_id_idx)->AsString()->CheckString();
}
void Info::IncrementByteCount(uint64 size, int field_idx)
{
uint64 old = LookupFieldDefaultCount(field_idx);
@ -172,7 +171,7 @@ bool Info::IsComplete() const
void Info::ScheduleInactivityTimer() const
{
timer_mgr->Add(new InfoTimer(network_time, FileID(), TimeoutInterval()));
timer_mgr->Add(new InfoTimer(network_time, file_id, TimeoutInterval()));
}
bool Info::AddAction(EnumVal* act, RecordVal* args)
@ -184,7 +183,7 @@ bool Info::AddAction(EnumVal* act, RecordVal* args)
if ( ! a ) return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d for file id %s", act->AsEnum(),
FileID().c_str());
file_id.c_str());
actions[act->AsEnum()] = a;
VectorVal* av = val->LookupWithDefault(actions_idx)->AsVectorVal();
@ -206,7 +205,7 @@ bool Info::RemoveAction(EnumVal* act)
if ( it == actions.end() ) return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Remove action %d for file id %s", act->AsEnum(),
FileID().c_str());
file_id.c_str());
delete it->second;
actions.erase(it);
return true;

View file

@ -7,6 +7,7 @@
#include "Conn.h"
#include "Val.h"
#include "Action.h"
#include "FileID.h"
namespace file_analysis {
@ -26,7 +27,12 @@ public:
/**
* @return value of the "file_id" field from #val record.
*/
string FileID() const;
FileID GetFileID() const { return file_id; }
/**
* @return the string which uniquely identifies the file.
*/
string Unique() const { return unique; }
/**
* @return #last_activity_time
@ -96,7 +102,7 @@ protected:
/**
* Constructor; only file_analysis::Manager should be creating these.
*/
Info(const string& file_id, Connection* conn = 0,
Info(const string& unique, Connection* conn = 0,
const string& protocol = "");
/**
@ -122,6 +128,8 @@ protected:
*/
double LookupFieldDefaultInterval(int idx) const;
FileID file_id; /**< A pretty hash that likely identifies file*/
string unique; /**< A string that uniquely identifies file */
RecordVal* val; /**< \c FileAnalysis::Info from script layer. */
double last_activity_time; /**< Time of last activity. */
bool postpone_timeout; /**< Whether postponing timeout is requested. */

View file

@ -1,8 +1,9 @@
#ifndef FILE_ANALYSIS_INFOTIMER_H
#define FILE_ANALYSIS_INFOTIMER_H
#include "Timer.h"
#include <string>
#include "Timer.h"
#include "FileID.h"
namespace file_analysis {
@ -12,7 +13,7 @@ namespace file_analysis {
class InfoTimer : public Timer {
public:
InfoTimer(double t, const string& id, double interval)
InfoTimer(double t, const FileID& id, double interval)
: Timer(t + interval, TIMER_FILE_ANALYSIS_INACTIVITY), file_id(id) {}
~InfoTimer() {}
@ -25,7 +26,7 @@ public:
protected:
string file_id;
FileID file_id;
};
} // namespace file_analysis

View file

@ -17,8 +17,8 @@ Manager::~Manager()
void Manager::Terminate()
{
vector<string> keys;
for ( FileMap::iterator it = file_map.begin(); it != file_map.end(); ++it )
vector<FileID> keys;
for ( IDMap::iterator it = id_map.begin(); it != id_map.end(); ++it )
keys.push_back(it->first);
for ( size_t i = 0; i < keys.size(); ++i )
Timeout(keys[i], true);
@ -29,46 +29,46 @@ static void check_file_done(Info* info)
if ( info->IsComplete() )
{
Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_DONE, info);
file_mgr->RemoveFile(info->FileID());
file_mgr->RemoveFile(info->GetFileID());
}
}
void Manager::DataIn(const string& file_id, const u_char* data, uint64 len,
void Manager::DataIn(const string& unique, const u_char* data, uint64 len,
uint64 offset, Connection* conn, const string& protocol)
{
Info* info = IDtoInfo(file_id, conn, protocol);
Info* info = GetInfo(unique, conn, protocol);
info->DataIn(data, len, offset);
check_file_done(info);
}
void Manager::DataIn(const string& file_id, const u_char* data, uint64 len,
void Manager::DataIn(const string& unique, const u_char* data, uint64 len,
Connection* conn, const string& protocol)
{
Info* info = IDtoInfo(file_id, conn, protocol);
Info* info = GetInfo(unique, conn, protocol);
info->DataIn(data, len);
check_file_done(info);
}
void Manager::EndOfFile(const string& file_id, Connection* conn,
void Manager::EndOfFile(const string& unique, Connection* conn,
const string& protocol)
{
Info* info = IDtoInfo(file_id, conn, protocol);
Info* info = GetInfo(unique, conn, protocol);
info->EndOfFile();
Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_EOF, info);
}
void Manager::Gap(const string& file_id, uint64 offset, uint64 len,
void Manager::Gap(const string& unique, uint64 offset, uint64 len,
Connection* conn, const string& protocol)
{
Info* info = IDtoInfo(file_id, conn, protocol);
Info* info = GetInfo(unique, conn, protocol);
info->Gap(offset, len);
Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_GAP, info);
}
void Manager::SetSize(const string& file_id, uint64 size,
void Manager::SetSize(const string& unique, uint64 size,
Connection* conn, const string& protocol)
{
Info* info = IDtoInfo(file_id, conn, protocol);
Info* info = GetInfo(unique, conn, protocol);
info->SetTotalBytes(size);
check_file_done(info);
}
@ -89,7 +89,7 @@ void Manager::EvaluatePolicy(BifEnum::FileAnalysis::Trigger t, Info* info)
Unref(result);
}
bool Manager::PostponeTimeout(const string& file_id) const
bool Manager::PostponeTimeout(const FileID& file_id) const
{
Info* info = Lookup(file_id);
@ -99,7 +99,7 @@ bool Manager::PostponeTimeout(const string& file_id) const
return true;
}
bool Manager::AddAction(const string& file_id, EnumVal* act,
bool Manager::AddAction(const FileID& file_id, EnumVal* act,
RecordVal* args) const
{
Info* info = Lookup(file_id);
@ -109,7 +109,7 @@ bool Manager::AddAction(const string& file_id, EnumVal* act,
return info->AddAction(act, args);
}
bool Manager::RemoveAction(const string& file_id, EnumVal* act) const
bool Manager::RemoveAction(const FileID& file_id, EnumVal* act) const
{
Info* info = Lookup(file_id);
@ -118,14 +118,23 @@ bool Manager::RemoveAction(const string& file_id, EnumVal* act) const
return info->RemoveAction(act);
}
Info* Manager::IDtoInfo(const string& file_id, Connection* conn,
Info* Manager::GetInfo(const string& unique, Connection* conn,
const string& protocol)
{
Info* rval = file_map[file_id];
Info* rval = str_map[unique];
if ( ! rval )
{
rval = file_map[file_id] = new Info(file_id, conn, protocol);
rval = str_map[unique] = new Info(unique, conn, protocol);
FileID id = rval->GetFileID();
if ( id_map[id] )
{
reporter->Error("Evicted duplicate file ID: %s", id.c_str());
RemoveFile(id);
}
id_map[id] = rval;
Manager::EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_NEW, rval);
}
else
@ -137,16 +146,16 @@ Info* Manager::IDtoInfo(const string& file_id, Connection* conn,
return rval;
}
Info* Manager::Lookup(const string& file_id) const
Info* Manager::Lookup(const FileID& file_id) const
{
FileMap::const_iterator it = file_map.find(file_id);
IDMap::const_iterator it = id_map.find(file_id);
if ( it == file_map.end() ) return 0;
if ( it == id_map.end() ) return 0;
return it->second;
}
void Manager::Timeout(const string& file_id, bool is_terminating)
void Manager::Timeout(const FileID& file_id, bool is_terminating)
{
Info* info = Lookup(file_id);
@ -157,25 +166,27 @@ void Manager::Timeout(const string& file_id, bool is_terminating)
if ( info->postpone_timeout && ! is_terminating )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Postpone file analysis timeout for %s",
info->FileID().c_str());
info->GetFileID().c_str());
info->UpdateLastActivityTime();
info->ScheduleInactivityTimer();
return;
}
DBG_LOG(DBG_FILE_ANALYSIS, "File analysis timeout for %s",
info->FileID().c_str());
info->GetFileID().c_str());
RemoveFile(file_id);
}
bool Manager::RemoveFile(const string& file_id)
bool Manager::RemoveFile(const FileID& file_id)
{
FileMap::iterator it = file_map.find(file_id);
IDMap::iterator it = id_map.find(file_id);
if ( it == file_map.end() ) return false;
if ( it == id_map.end() ) return false;
if ( ! str_map.erase(it->second->Unique()) )
reporter->Error("No string mapping for file ID %s", file_id.c_str());
delete it->second;
file_map.erase(it);
id_map.erase(it);
return true;
}

View file

@ -10,6 +10,7 @@
#include "Info.h"
#include "InfoTimer.h"
#include "FileID.h"
namespace file_analysis {
@ -31,58 +32,58 @@ public:
/**
* Pass in non-sequential file data.
*/
void DataIn(const string& file_id, const u_char* data, uint64 len,
void DataIn(const string& unique, const u_char* data, uint64 len,
uint64 offset, Connection* conn = 0,
const string& protocol = "");
/**
* Pass in sequential file data.
*/
void DataIn(const string& file_id, const u_char* data, uint64 len,
void DataIn(const string& unique, const u_char* data, uint64 len,
Connection* conn = 0, const string& protocol = "");
/**
* Signal the end of file data.
*/
void EndOfFile(const string& file_id, Connection* conn = 0,
void EndOfFile(const string& unique, Connection* conn = 0,
const string& protocol = "");
/**
* Signal a gap in the file data stream.
*/
void Gap(const string& file_id, uint64 offset, uint64 len,
void Gap(const string& unique, uint64 offset, uint64 len,
Connection* conn = 0, const string& protocol = "");
/**
* Provide the expected number of bytes that comprise a file.
*/
void SetSize(const string& file_id, uint64 size, Connection* conn = 0,
void SetSize(const string& unique, uint64 size, Connection* conn = 0,
const string& protocol = "");
/**
* Discard the file_analysis::Info object associated with \a file_id.
* @return false if file identifier did not map to anything, else true.
*/
bool RemoveFile(const string& file_id);
bool RemoveFile(const FileID& file_id);
/**
* If called during \c FileAnalysis::policy evaluation for a
* \c FileAnalysis::TRIGGER_TIMEOUT, requests deferral of analysis timeout.
*/
bool PostponeTimeout(const string& file_id) const;
bool PostponeTimeout(const FileID& file_id) const;
/**
* Attaches an action to the file identifier. Only one action of a given
* type can be attached per file identifier at a time.
* @return true if the action was attached, else false.
*/
bool AddAction(const string& file_id, EnumVal* act, RecordVal* args) const;
bool AddAction(const FileID& file_id, EnumVal* act, RecordVal* args) const;
/**
* Removes an action for a given file identifier.
* @return true if the action was removed, else false.
*/
bool RemoveAction(const string& file_id, EnumVal* act) const;
bool RemoveAction(const FileID& file_id, EnumVal* act) const;
/**
* Calls the \c FileAnalysis::policy hook.
@ -93,29 +94,31 @@ protected:
friend class InfoTimer;
typedef map<string, Info*> FileMap;
typedef map<string, Info*> StrMap;
typedef map<FileID, Info*> IDMap;
/**
* @return the Info object mapped to \a file_id. One is created if mapping
* @return the Info object mapped to \a unique. One is created if mapping
* doesn't exist. If it did exist, the activity time is refreshed
* and connection-related fields of the record value may be updated.
*/
Info* IDtoInfo(const string& file_id, Connection* conn = 0,
Info* GetInfo(const string& unique, Connection* conn = 0,
const string& protocol = "");
/**
* @return the Info object mapped to \a file_id, or a null pointer if no
* mapping exists.
*/
Info* Lookup(const string& file_id) const;
Info* Lookup(const FileID& file_id) const;
/**
* Evaluate timeout policy for a file and remove the Info object mapped to
* \a file_id if needed.
*/
void Timeout(const string& file_id, bool is_terminating = ::terminating);
void Timeout(const FileID& file_id, bool is_terminating = ::terminating);
FileMap file_map; /**< Map strings to \c FileAnalysis::Info records. */
StrMap str_map; /**< Map unique strings to \c FileAnalysis::Info records. */
IDMap id_map; /**< Map file IDs to \c FileAnalysis::Info records. */
};
} // namespace file_analysis