FileAnalysis: buffer input that can't get unique file handle immediately

A retry happens on every new input and also periodically based on a
timer.  If a file handle is returned at those times, the input is
forwarded for analysis, else it keeps retrying until a timeout
threshold.
This commit is contained in:
Jon Siwek 2013-03-14 10:57:16 -05:00
parent 878dfff2f2
commit 637fe69cf9
11 changed files with 319 additions and 103 deletions

View file

@ -3025,6 +3025,26 @@ export {
}
module GLOBAL;
module FileAnalysis;
export {
## When the file analysis framework receives input regarding a file
## transferred over the network, and a unique handle string cannot
## be determined immediately from :bro:see:`FileAnalysis::handle_callbacks`,
## that input is buffered. This is the interval at which to automatically
## check back on any currently buffered inputs to see if a handle is
## available so that the input can be processed. Since any input
## triggers the check for all buffered inputs, this option only helps
## cases where the file analysis framework is getting little input.
const pending_file_drain_interval = 10 sec &redef;
## This is the interval at which to give up checking for a unique handle
## string for files transferred over the network that were initially
## buffered because no handle was available yet (e.g. when the necessary
## events to construct the handle may not have been flushed yet).
const pending_file_timeout = 10 sec &redef;
}
module GLOBAL;
## Number of bytes per packet to capture from live interfaces.
const snaplen = 8192 &redef;

View file

@ -12,6 +12,7 @@ function get_file_handle(c: connection, is_orig: bool): string
if ( c$http$range_request )
return fmt("%s http(%s): %s: %s", c$start_time, is_orig,
c$id$orig_h, build_url(c$http));
return fmt("%s http(%s, %s): %s", c$start_time, is_orig,
c$http$trans_depth, id_string(c$id));
}

View file

@ -30,7 +30,6 @@
#include "PacketSort.h"
#include "Serializer.h"
#include "PacketDumper.h"
#include "file_analysis/Manager.h"
extern "C" {
#include "setsignal.h"
@ -353,7 +352,6 @@ void net_packet_dispatch(double t, const struct pcap_pkthdr* hdr,
sessions->DispatchPacket(t, hdr, pkt, hdr_size, src_ps, pkt_elem);
mgr.Drain();
file_mgr->DrainPending();
if ( sp )
{

View file

@ -192,7 +192,6 @@
#include "logging/Manager.h"
#include "IPAddr.h"
#include "bro_inet_ntop.h"
#include "file_analysis/Manager.h"
extern "C" {
#include "setsignal.h"
@ -1463,7 +1462,6 @@ void RemoteSerializer::Process()
current_iosrc = this;
sessions->NextPacket(p->time, p->hdr, p->pkt, p->hdr_size, 0);
mgr.Drain();
file_mgr->DrainPending();
current_hdr = 0; // done with these
current_pkt = 0;

View file

@ -23,6 +23,7 @@ enum TimerType {
TIMER_CONN_STATUS_UPDATE,
TIMER_DNS_EXPIRE,
TIMER_FILE_ANALYSIS_INACTIVITY,
TIMER_FILE_ANALYSIS_DRAIN,
TIMER_FRAG,
TIMER_INCREMENTAL_SEND,
TIMER_INCREMENTAL_WRITE,

View file

@ -23,3 +23,6 @@ const Tunnel::delay_gtp_confirmation: bool;
const Tunnel::ip_tunnel_timeout: interval;
const Threading::heartbeat_interval: interval;
const FileAnalysis::pending_file_drain_interval: interval;
const FileAnalysis::pending_file_timeout: interval;

View file

@ -16,8 +16,6 @@ public:
InfoTimer(double t, const FileID& id, double interval)
: Timer(t + interval, TIMER_FILE_ANALYSIS_INACTIVITY), file_id(id) {}
~InfoTimer() {}
/**
* Check inactivity of file_analysis::Info corresponding to #file_id,
* reschedule if active, else call file_analysis::Manager::Timeout.

View file

@ -8,8 +8,19 @@
using namespace file_analysis;
Manager::Manager()
void DrainTimer::Dispatch(double t, int is_expire)
{
using BifConst::FileAnalysis::pending_file_drain_interval;
DBG_LOG(DBG_FILE_ANALYSIS, "DrainTimer dispatched");
file_mgr->DrainPending();
if ( ! is_expire )
timer_mgr->Add(new DrainTimer(pending_file_drain_interval));
}
Manager::Manager() : is_draining(false)
{
using BifConst::FileAnalysis::pending_file_drain_interval;
timer_mgr->Add(new DrainTimer(pending_file_drain_interval));
}
Manager::~Manager()
@ -38,11 +49,14 @@ string Manager::GetFileHandle(Analyzer* root, Connection* conn,
vl.append(new Val(is_orig, TYPE_BOOL));
Val* result = callback->AsFunc()->Call(&vl);
if ( result )
{
string rval = result->AsString()->CheckString();
Unref(result);
if ( ! rval.empty() ) return rval;
}
}
for ( analyzer_list::const_iterator it = root->GetChildren().begin();
it != root->GetChildren().end(); ++it )
@ -63,14 +77,29 @@ string Manager::GetFileHandle(Connection* conn, bool is_orig) const
void Manager::DrainPending()
{
for ( size_t i = 0; i < pending.size(); ++i )
pending[i].Retry();
if ( is_draining ) return;
pending.clear();
is_draining = true;
PendingList::iterator it = pending.begin();
while ( it != pending.end() )
{
if ( (*it)->Retry() || (*it)->IsStale() )
{
delete *it;
pending.erase(it++);
}
else
++it;
}
is_draining = false;
}
void Manager::Terminate()
{
DrainPending();
vector<FileID> keys;
for ( IDMap::iterator it = id_map.begin(); it != id_map.end(); ++it )
keys.push_back(it->first);
@ -78,19 +107,24 @@ void Manager::Terminate()
Timeout(keys[i], true);
}
void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
Connection* conn, bool is_orig, bool allow_retry)
bool Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
Connection* conn, bool is_orig)
{
DrainPending();
string unique = GetFileHandle(conn, is_orig);
if ( ! unique.empty() )
{
DataIn(data, len, offset, GetInfo(unique, conn));
return;
return true;
}
if ( allow_retry )
pending.push_back(PendingFile(data, len, offset, conn, is_orig));
if ( ! is_draining )
pending.push_back(new PendingDataInChunk(data, len, offset, conn,
is_orig));
return false;
}
void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
@ -102,6 +136,8 @@ void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
Info* info)
{
DrainPending();
if ( ! info ) return;
info->DataIn(data, len, offset);
@ -110,19 +146,23 @@ void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
RemoveFile(info->GetUnique());
}
void Manager::DataIn(const u_char* data, uint64 len, Connection* conn,
bool is_orig, bool allow_retry)
bool Manager::DataIn(const u_char* data, uint64 len, Connection* conn,
bool is_orig)
{
DrainPending();
string unique = GetFileHandle(conn, is_orig);
if ( ! unique.empty() )
{
DataIn(data, len, GetInfo(unique, conn));
return;
return true;
}
if ( allow_retry )
pending.push_back(PendingFile(data, len, conn, is_orig));
if ( ! is_draining )
pending.push_back(new PendingDataInStream(data, len, conn, is_orig));
return false;
}
void Manager::DataIn(const u_char* data, uint64 len, const string& unique)
@ -132,6 +172,8 @@ void Manager::DataIn(const u_char* data, uint64 len, const string& unique)
void Manager::DataIn(const u_char* data, uint64 len, Info* info)
{
DrainPending();
if ( ! info ) return;
info->DataIn(data, len);
@ -146,27 +188,46 @@ void Manager::EndOfFile(Connection* conn)
EndOfFile(conn, false);
}
void Manager::EndOfFile(Connection* conn, bool is_orig)
bool Manager::EndOfFile(Connection* conn, bool is_orig)
{
DrainPending();
string unique = GetFileHandle(conn, is_orig);
if ( unique.empty() ) return; // nothing to do
if ( ! unique.empty() )
{
RemoveFile(unique);
return true;
}
if ( ! is_draining )
pending.push_back(new PendingEOF(conn, is_orig));
return false;
}
void Manager::EndOfFile(const string& unique)
{
DrainPending();
RemoveFile(unique);
}
void Manager::Gap(uint64 offset, uint64 len, Connection* conn, bool is_orig)
bool Manager::Gap(uint64 offset, uint64 len, Connection* conn, bool is_orig)
{
DrainPending();
string unique = GetFileHandle(conn, is_orig);
if ( unique.empty() ) return; // nothing to do since no data has been seen
if ( ! unique.empty() )
{
Gap(offset, len, GetInfo(unique, conn));
return true;
}
if ( ! is_draining )
pending.push_back(new PendingGap(offset, len, conn, is_orig));
return false;
}
void Manager::Gap(uint64 offset, uint64 len, const string& unique)
@ -176,18 +237,29 @@ void Manager::Gap(uint64 offset, uint64 len, const string& unique)
void Manager::Gap(uint64 offset, uint64 len, Info* info)
{
DrainPending();
if ( ! info ) return;
info->Gap(offset, len);
}
void Manager::SetSize(uint64 size, Connection* conn, bool is_orig)
bool Manager::SetSize(uint64 size, Connection* conn, bool is_orig)
{
DrainPending();
string unique = GetFileHandle(conn, is_orig);
if ( unique.empty() ) return; // ok assuming this always follows a DataIn()
if ( ! unique.empty() )
{
SetSize(size, GetInfo(unique, conn));
return true;
}
if ( ! is_draining )
pending.push_back(new PendingSize(size, conn, is_orig));
return false;
}
void Manager::SetSize(uint64 size, const string& unique)
@ -197,6 +269,8 @@ void Manager::SetSize(uint64 size, const string& unique)
void Manager::SetSize(uint64 size, Info* info)
{
DrainPending();
if ( ! info ) return;
info->SetTotalBytes(size);
@ -294,6 +368,8 @@ Info* Manager::Lookup(const FileID& file_id) const
void Manager::Timeout(const FileID& file_id, bool is_terminating)
{
DrainPending();
Info* info = Lookup(file_id);
if ( ! info ) return;

View file

@ -4,12 +4,13 @@
#include <string>
#include <map>
#include <set>
#include <vector>
#include <list>
#include "Net.h"
#include "Conn.h"
#include "Val.h"
#include "Analyzer.h"
#include "Timer.h"
#include "Info.h"
#include "InfoTimer.h"
@ -18,6 +19,15 @@
namespace file_analysis {
class DrainTimer : public Timer {
public:
DrainTimer(double interval)
: Timer(network_time + interval, TIMER_FILE_ANALYSIS_DRAIN) {}
void Dispatch(double t, int is_expire);
};
/**
* Main entry point for interacting with file analysis.
*/
@ -28,17 +38,6 @@ public:
~Manager();
/**
* Attempts to forward the data from any pending file contents, i.e.
* those for which a unique file handle string could not immediately
* be determined. If again a file handle can't be determined, give up.
* The assumption for this to work correctly is that the EventMgr would
* have always drained between packet boundaries, so calling this method
* at that time may mean the script-layer function for generating file
* handles can now come up with a result.
*/
void DrainPending();
/**
* Times out any active file analysis to prepare for shutdown.
*/
@ -47,8 +46,8 @@ public:
/**
* Pass in non-sequential file data.
*/
void DataIn(const u_char* data, uint64 len, uint64 offset,
Connection* conn, bool is_orig, bool allow_retry = true);
bool DataIn(const u_char* data, uint64 len, uint64 offset,
Connection* conn, bool is_orig);
void DataIn(const u_char* data, uint64 len, uint64 offset,
const string& unique);
void DataIn(const u_char* data, uint64 len, uint64 offset,
@ -57,8 +56,7 @@ public:
/**
* Pass in sequential file data.
*/
void DataIn(const u_char* data, uint64 len, Connection* conn, bool is_orig,
bool allow_retry = true);
bool DataIn(const u_char* data, uint64 len, Connection* conn, bool is_orig);
void DataIn(const u_char* data, uint64 len, const string& unique);
void DataIn(const u_char* data, uint64 len, Info* info);
@ -66,20 +64,20 @@ public:
* Signal the end of file data.
*/
void EndOfFile(Connection* conn);
void EndOfFile(Connection* conn, bool is_orig);
bool EndOfFile(Connection* conn, bool is_orig);
void EndOfFile(const string& unique);
/**
* Signal a gap in the file data stream.
*/
void Gap(uint64 offset, uint64 len, Connection* conn, bool is_orig);
bool Gap(uint64 offset, uint64 len, Connection* conn, bool is_orig);
void Gap(uint64 offset, uint64 len, const string& unique);
void Gap(uint64 offset, uint64 len, Info* info);
/**
* Provide the expected number of bytes that comprise a file.
*/
void SetSize(uint64 size, Connection* conn, bool is_orig);
bool SetSize(uint64 size, Connection* conn, bool is_orig);
void SetSize(uint64 size, const string& unique);
void SetSize(uint64 size, Info* info);
@ -118,12 +116,13 @@ public:
protected:
friend class InfoTimer;
friend class DrainTimer;
friend class PendingFile;
typedef map<string, Info*> StrMap;
typedef set<string> StrSet;
typedef map<FileID, Info*> IDMap;
typedef vector<PendingFile> PendingList;
typedef list<PendingFile*> PendingList;
/**
* @return the Info object mapped to \a unique or a null pointer if analysis
@ -169,10 +168,19 @@ protected:
*/
bool IsIgnored(const string& unique);
/**
* Attempts to forward the data from any pending file contents, i.e.
* those for which a unique file handle string could not immediately
* be determined.
*/
void DrainPending();
StrMap str_map; /**< Map unique strings to \c FileAnalysis::Info records. */
IDMap id_map; /**< Map file IDs to \c FileAnalysis::Info records. */
StrSet ignored; /**< Ignored files. Will be finally removed on EOF. */
PendingList pending; /**< Files waiting for next Tick to return a handle */
PendingList pending; /**< Files awaiting a unique handle. */
bool is_draining;
};
} // namespace file_analysis

View file

@ -3,58 +3,112 @@
using namespace file_analysis;
PendingFile::PendingFile(const u_char* arg_data, uint64 arg_len,
uint64 arg_offset, Connection* arg_conn,
bool arg_is_orig)
: is_linear(false), data(arg_data), len(arg_len), offset(arg_offset),
conn(arg_conn), is_orig(arg_is_orig)
static void copy_data(const u_char** dst, const u_char* src, uint64 len)
{
Ref(conn);
u_char* tmp = new u_char[len];
memcpy(tmp, src, len);
*dst = tmp;
}
PendingFile::PendingFile(const u_char* arg_data, uint64 arg_len,
Connection* arg_conn, bool arg_is_orig)
: is_linear(true), data(arg_data), len(arg_len), offset(0),
conn(arg_conn), is_orig(arg_is_orig)
static string conn_str(Connection* c)
{
Ref(conn);
char op[256], rp[256];
modp_ulitoa10(ntohs(c->OrigPort()), op);
modp_ulitoa10(ntohs(c->RespPort()), rp);
string rval = c->OrigAddr().AsString() + ":" + op + "->" +
c->RespAddr().AsString() + ":" + rp;
return rval;
}
PendingFile::PendingFile(const PendingFile& other)
: is_linear(other.is_linear), data(other.data), len(other.len),
offset(other.offset), conn(other.conn), is_orig(other.is_orig)
PendingFile::PendingFile(Connection* arg_conn, bool arg_is_orig)
: conn(arg_conn), is_orig(arg_is_orig), creation_time(network_time)
{
Ref(conn);
}
PendingFile& PendingFile::operator=(const PendingFile& other)
{
// handle self-assign for correct reference counting
if ( this == &other ) return *this;
Unref(conn);
is_linear = other.is_linear;
data = other.data;
len = other.len;
offset = other.offset;
conn = other.conn;
is_orig = other.is_orig;
Ref(conn);
return *this;
DBG_LOG(DBG_FILE_ANALYSIS, "New pending file: %s", conn_str(conn).c_str());
}
PendingFile::~PendingFile()
{
Unref(conn);
DBG_LOG(DBG_FILE_ANALYSIS, "Delete pending file: %s",
conn_str(conn).c_str());
}
void PendingFile::Retry() const
bool PendingFile::IsStale() const
{
if ( is_linear )
file_mgr->DataIn(data, len, conn, is_orig, false);
else
file_mgr->DataIn(data, len, offset, conn, is_orig, false);
using BifConst::FileAnalysis::pending_file_timeout;
if ( creation_time + pending_file_timeout < network_time )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Stale pending file: %s",
conn_str(conn).c_str());
return true;
}
return false;
}
PendingDataInChunk::PendingDataInChunk(const u_char* arg_data, uint64 arg_len,
uint64 arg_offset, Connection* arg_conn,
bool arg_is_orig)
: PendingFile(arg_conn, arg_is_orig), len(arg_len), offset(arg_offset)
{
copy_data(&data, arg_data, len);
}
bool PendingDataInChunk::Retry() const
{
return file_mgr->DataIn(data, len, offset, conn, is_orig);
}
PendingDataInChunk::~PendingDataInChunk()
{
delete [] data;
}
PendingDataInStream::PendingDataInStream(const u_char* arg_data, uint64 arg_len,
Connection* arg_conn, bool arg_is_orig)
: PendingFile(arg_conn, arg_is_orig), len(arg_len)
{
copy_data(&data, arg_data, len);
}
bool PendingDataInStream::Retry() const
{
return file_mgr->DataIn(data, len, conn, is_orig);
}
PendingDataInStream::~PendingDataInStream()
{
delete [] data;
}
PendingGap::PendingGap(uint64 arg_offset, uint64 arg_len, Connection* arg_conn,
bool arg_is_orig)
: PendingFile(arg_conn, arg_is_orig), offset(arg_offset), len(arg_len)
{
}
bool PendingGap::Retry() const
{
return file_mgr->Gap(offset, len, conn, is_orig);
}
PendingEOF::PendingEOF(Connection* arg_conn, bool arg_is_orig)
: PendingFile(arg_conn, arg_is_orig)
{
}
bool PendingEOF::Retry() const
{
return file_mgr->EndOfFile(conn, is_orig);
}
PendingSize::PendingSize(uint64 arg_size, Connection* arg_conn,
bool arg_is_orig)
: PendingFile(arg_conn, arg_is_orig), size(arg_size)
{
}
bool PendingSize::Retry() const
{
return file_mgr->SetSize(size, conn, is_orig);
}

View file

@ -8,28 +8,87 @@ namespace file_analysis {
class PendingFile {
public:
PendingFile(const u_char* arg_data, uint64 arg_len, uint64 arg_offset,
Connection* arg_conn, bool arg_is_orig);
virtual ~PendingFile();
PendingFile(const u_char* arg_data, uint64 arg_len,
Connection* arg_conn, bool arg_is_orig);
virtual bool Retry() const = 0;
PendingFile(const PendingFile& other);
bool IsStale() const;
PendingFile& operator=(const PendingFile& other);
protected:
~PendingFile();
PendingFile(Connection* arg_conn, bool arg_is_orig);
void Retry() const;
Connection* conn;
bool is_orig;
double creation_time;
};
private:
class PendingDataInChunk : public PendingFile {
public:
PendingDataInChunk(const u_char* arg_data, uint64 arg_len,
uint64 arg_offset, Connection* arg_conn,
bool arg_is_orig);
virtual ~PendingDataInChunk();
virtual bool Retry() const;
protected:
bool is_linear;
const u_char* data;
uint64 len;
uint64 offset;
Connection* conn;
bool is_orig;
};
class PendingDataInStream : public PendingFile {
public:
PendingDataInStream(const u_char* arg_data, uint64 arg_len,
Connection* arg_conn, bool arg_is_orig);
virtual ~PendingDataInStream();
virtual bool Retry() const;
protected:
const u_char* data;
uint64 len;
};
class PendingGap : public PendingFile {
public:
PendingGap(uint64 arg_offset, uint64 arg_len, Connection* arg_conn,
bool arg_is_orig);
virtual bool Retry() const;
protected:
uint64 offset;
uint64 len;
};
class PendingEOF : public PendingFile {
public:
PendingEOF(Connection* arg_conn, bool arg_is_orig);
virtual bool Retry() const;
};
class PendingSize : public PendingFile {
public:
PendingSize(uint64 arg_size, Connection* arg_conn, bool arg_is_orig);
virtual bool Retry() const;
protected:
uint64 size;
};
} // namespace file_analysis