FileAnalysis: replace script-layer http file analysis.

Other misc:

- Remove HTTP::MD5 notice.

- Add "last_active" field to FileAnalysis::Info record.

- Replace "conn_uids", "conn_ids" fields in FileAnalysis::Info record
  with just a "conns" fields containing full connection records.

- The http-methods unit test is failing now, but I think it will be
  fixed once I change the file handle callback mechanism to use events
  instead.
This commit is contained in:
Jon Siwek 2013-03-22 16:14:06 -05:00
parent 7034785810
commit 71f0e2d276
61 changed files with 411 additions and 625 deletions

View file

@ -8,22 +8,15 @@
#include "Reporter.h"
#include "Val.h"
#include "Type.h"
#include "Analyzer.h"
using namespace file_analysis;
static TableVal* empty_conn_id_set()
static TableVal* empty_connection_table()
{
TypeList* set_index = new TypeList(conn_id);
set_index->Append(conn_id->Ref());
return new TableVal(new SetType(set_index, 0));
}
static StringVal* get_conn_uid_val(Connection* conn)
{
char tmp[20];
if ( ! conn->GetUID() )
conn->SetUID(calculate_unique_id());
return new StringVal(uitoa_n(conn->GetUID(), tmp, sizeof(tmp), 62));
TypeList* tbl_index = new TypeList(conn_id);
tbl_index->Append(conn_id->Ref());
return new TableVal(new TableType(tbl_index, connection_type->Ref()));
}
static RecordVal* get_conn_id_val(const Connection* conn)
@ -39,8 +32,8 @@ static RecordVal* get_conn_id_val(const Connection* conn)
int Info::file_id_idx = -1;
int Info::parent_file_id_idx = -1;
int Info::source_idx = -1;
int Info::conn_uids_idx = -1;
int Info::conn_ids_idx = -1;
int Info::conns_idx = -1;
int Info::last_active_idx = -1;
int Info::seen_bytes_idx = -1;
int Info::total_bytes_idx = -1;
int Info::missing_bytes_idx = -1;
@ -64,8 +57,8 @@ void Info::StaticInit()
file_id_idx = Idx("file_id");
parent_file_id_idx = Idx("parent_file_id");
source_idx = Idx("source");
conn_uids_idx = Idx("conn_uids");
conn_ids_idx = Idx("conn_ids");
conns_idx = Idx("conns");
last_active_idx = Idx("last_active");
seen_bytes_idx = Idx("seen_bytes");
total_bytes_idx = Idx("total_bytes");
missing_bytes_idx = Idx("missing_bytes");
@ -83,10 +76,9 @@ void Info::StaticInit()
salt = BifConst::FileAnalysis::salt->CheckString();
}
Info::Info(const string& unique, Connection* conn)
: file_id(unique), unique(unique), val(0), last_activity_time(network_time),
postpone_timeout(false), need_reassembly(false), done(false),
actions(this)
Info::Info(const string& unique, Connection* conn, AnalyzerTag::Tag tag)
: file_id(unique), unique(unique), val(0), postpone_timeout(false),
need_reassembly(false), done(false), actions(this)
{
StaticInit();
@ -106,29 +98,15 @@ Info::Info(const string& unique, Connection* conn)
if ( conn )
{
// update source and connection fields
RecordVal* cval = conn->BuildConnVal();
ListVal* services = cval->Lookup(5)->AsTableVal()->ConvertToPureList();
Unref(cval);
string source;
for ( int i = 0; i < services->Length(); ++i )
{
if ( i > 0 )
source += ", ";
source += services->Index(i)->AsStringVal()->CheckString();
}
Unref(services);
if ( ! source.empty() )
val->Assign(source_idx, new StringVal(source.c_str()));
// add source and connection fields
val->Assign(source_idx, new StringVal(Analyzer::GetTagName(tag)));
UpdateConnectionFields(conn);
}
else
// use the unique file handle as source
val->Assign(source_idx, new StringVal(unique.c_str()));
UpdateLastActivityTime();
}
Info::~Info()
@ -137,19 +115,28 @@ Info::~Info()
Unref(val);
}
void Info::UpdateLastActivityTime()
{
val->Assign(last_active_idx, new Val(network_time, TYPE_TIME));
}
double Info::GetLastActivityTime() const
{
return val->Lookup(last_active_idx)->AsTime();
}
void Info::UpdateConnectionFields(Connection* conn)
{
if ( ! conn ) return;
Val* conn_uids = val->Lookup(conn_uids_idx);
Val* conn_ids = val->Lookup(conn_ids_idx);
if ( ! conn_uids )
val->Assign(conn_uids_idx, conn_uids = new TableVal(string_set));
if ( ! conn_ids )
val->Assign(conn_ids_idx, conn_ids = empty_conn_id_set());
Val* conns = val->Lookup(conns_idx);
conn_uids->AsTableVal()->Assign(get_conn_uid_val(conn), 0);
conn_ids->AsTableVal()->Assign(get_conn_id_val(conn), 0);
if ( ! conns )
val->Assign(conns_idx, conns = empty_connection_table());
Val* idx = get_conn_id_val(conn);
conns->AsTableVal()->Assign(idx, conn->BuildConnVal());
Unref(idx);
}
uint64 Info::LookupFieldDefaultCount(int idx) const

View file

@ -5,6 +5,7 @@
#include <vector>
#include <magic.h>
#include "AnalyzerTags.h"
#include "Conn.h"
#include "Val.h"
#include "ActionSet.h"
@ -49,14 +50,14 @@ public:
string GetUnique() const { return unique; }
/**
* @return #last_activity_time
* @return value of "last_active" field in #val record;
*/
double GetLastActivityTime() const { return last_activity_time; }
double GetLastActivityTime() const;
/**
* Refreshes #last_activity_time with current network time.
* Refreshes "last_active" field of #val record with current network time.
*/
void UpdateLastActivityTime() { last_activity_time = network_time; }
void UpdateLastActivityTime();
/**
* Set "total_bytes" field of #val record to \a size.
@ -73,7 +74,7 @@ public:
/**
* Create a timer to be dispatched after the amount of time indicated by
* the "timeout_interval" field of the #val record in order to check if
* #last_activity_time is old enough to timeout analysis of the file.
* "last_active" field is old enough to timeout analysis of the file.
*/
void ScheduleInactivityTimer() const;
@ -117,7 +118,8 @@ protected:
/**
* Constructor; only file_analysis::Manager should be creating these.
*/
Info(const string& unique, Connection* conn = 0);
Info(const string& unique, Connection* conn = 0,
AnalyzerTag::Tag tag = AnalyzerTag::Error);
/**
* Updates the "conn_ids" and "conn_uids" fields in #val record with the
@ -156,7 +158,6 @@ protected:
FileID file_id; /**< A pretty hash that likely identifies file*/
string unique; /**< A string that uniquely identifies file */
RecordVal* val; /**< \c FileAnalysis::Info from script layer. */
double last_activity_time; /**< Time of last activity. */
bool postpone_timeout; /**< Whether postponing timeout is requested. */
bool need_reassembly; /**< Whether file stream reassembly is needed. */
bool done; /**< If this object is about to be deleted. */
@ -192,8 +193,8 @@ public:
static int file_id_idx;
static int parent_file_id_idx;
static int source_idx;
static int conn_uids_idx;
static int conn_ids_idx;
static int conns_idx;
static int last_active_idx;
static int seen_bytes_idx;
static int total_bytes_idx;
static int missing_bytes_idx;

View file

@ -108,7 +108,7 @@ void Manager::Terminate()
}
bool Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
Connection* conn, bool is_orig)
AnalyzerTag::Tag tag, Connection* conn, bool is_orig)
{
DrainPending();
@ -116,12 +116,12 @@ bool Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
if ( ! unique.empty() )
{
DataIn(data, len, offset, GetInfo(unique, conn));
DataIn(data, len, offset, GetInfo(unique, conn, tag));
return true;
}
if ( ! is_draining )
pending.push_back(new PendingDataInChunk(data, len, offset, conn,
pending.push_back(new PendingDataInChunk(data, len, offset, tag, conn,
is_orig));
return false;
@ -146,8 +146,8 @@ void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
RemoveFile(info->GetUnique());
}
bool Manager::DataIn(const u_char* data, uint64 len, Connection* conn,
bool is_orig)
bool Manager::DataIn(const u_char* data, uint64 len, AnalyzerTag::Tag tag,
Connection* conn, bool is_orig)
{
DrainPending();
@ -155,12 +155,13 @@ bool Manager::DataIn(const u_char* data, uint64 len, Connection* conn,
if ( ! unique.empty() )
{
DataIn(data, len, GetInfo(unique, conn));
DataIn(data, len, GetInfo(unique, conn, tag));
return true;
}
if ( ! is_draining )
pending.push_back(new PendingDataInStream(data, len, conn, is_orig));
pending.push_back(new PendingDataInStream(data, len, tag, conn,
is_orig));
return false;
}
@ -212,7 +213,8 @@ void Manager::EndOfFile(const string& unique)
RemoveFile(unique);
}
bool Manager::Gap(uint64 offset, uint64 len, Connection* conn, bool is_orig)
bool Manager::Gap(uint64 offset, uint64 len, AnalyzerTag::Tag tag,
Connection* conn, bool is_orig)
{
DrainPending();
@ -220,12 +222,12 @@ bool Manager::Gap(uint64 offset, uint64 len, Connection* conn, bool is_orig)
if ( ! unique.empty() )
{
Gap(offset, len, GetInfo(unique, conn));
Gap(offset, len, GetInfo(unique, conn, tag));
return true;
}
if ( ! is_draining )
pending.push_back(new PendingGap(offset, len, conn, is_orig));
pending.push_back(new PendingGap(offset, len, tag, conn, is_orig));
return false;
}
@ -244,7 +246,8 @@ void Manager::Gap(uint64 offset, uint64 len, Info* info)
info->Gap(offset, len);
}
bool Manager::SetSize(uint64 size, Connection* conn, bool is_orig)
bool Manager::SetSize(uint64 size, AnalyzerTag::Tag tag, Connection* conn,
bool is_orig)
{
DrainPending();
@ -252,12 +255,12 @@ bool Manager::SetSize(uint64 size, Connection* conn, bool is_orig)
if ( ! unique.empty() )
{
SetSize(size, GetInfo(unique, conn));
SetSize(size, GetInfo(unique, conn, tag));
return true;
}
if ( ! is_draining )
pending.push_back(new PendingSize(size, conn, is_orig));
pending.push_back(new PendingSize(size, tag, conn, is_orig));
return false;
}
@ -326,7 +329,8 @@ bool Manager::RemoveAction(const FileID& file_id, const RecordVal* args) const
return info->RemoveAction(args);
}
Info* Manager::GetInfo(const string& unique, Connection* conn)
Info* Manager::GetInfo(const string& unique, Connection* conn,
AnalyzerTag::Tag tag)
{
if ( IsIgnored(unique) ) return 0;
@ -334,7 +338,7 @@ Info* Manager::GetInfo(const string& unique, Connection* conn)
if ( ! rval )
{
rval = str_map[unique] = new Info(unique, conn);
rval = str_map[unique] = new Info(unique, conn, tag);
FileID id = rval->GetFileID();
if ( id_map[id] )

View file

@ -7,6 +7,7 @@
#include <list>
#include "Net.h"
#include "AnalyzerTags.h"
#include "Conn.h"
#include "Val.h"
#include "Analyzer.h"
@ -47,7 +48,7 @@ public:
* Pass in non-sequential file data.
*/
bool DataIn(const u_char* data, uint64 len, uint64 offset,
Connection* conn, bool is_orig);
AnalyzerTag::Tag tag, Connection* conn, bool is_orig);
void DataIn(const u_char* data, uint64 len, uint64 offset,
const string& unique);
void DataIn(const u_char* data, uint64 len, uint64 offset,
@ -56,7 +57,8 @@ public:
/**
* Pass in sequential file data.
*/
bool DataIn(const u_char* data, uint64 len, Connection* conn, bool is_orig);
bool DataIn(const u_char* data, uint64 len, AnalyzerTag::Tag tag,
Connection* conn, bool is_orig);
void DataIn(const u_char* data, uint64 len, const string& unique);
void DataIn(const u_char* data, uint64 len, Info* info);
@ -70,14 +72,16 @@ public:
/**
* Signal a gap in the file data stream.
*/
bool Gap(uint64 offset, uint64 len, Connection* conn, bool is_orig);
bool Gap(uint64 offset, uint64 len, AnalyzerTag::Tag tag, Connection* conn,
bool is_orig);
void Gap(uint64 offset, uint64 len, const string& unique);
void Gap(uint64 offset, uint64 len, Info* info);
/**
* Provide the expected number of bytes that comprise a file.
*/
bool SetSize(uint64 size, Connection* conn, bool is_orig);
bool SetSize(uint64 size, AnalyzerTag::Tag tag, Connection* conn,
bool is_orig);
void SetSize(uint64 size, const string& unique);
void SetSize(uint64 size, Info* info);
@ -131,7 +135,8 @@ protected:
* activity time is refreshed along with any connection-related
* fields.
*/
Info* GetInfo(const string& unique, Connection* conn = 0);
Info* GetInfo(const string& unique, Connection* conn = 0,
AnalyzerTag::Tag tag = AnalyzerTag::Error);
/**
* @return a string which can uniquely identify the file being transported

View file

@ -20,8 +20,10 @@ static string conn_str(Connection* c)
return rval;
}
PendingFile::PendingFile(Connection* arg_conn, bool arg_is_orig)
: conn(arg_conn), is_orig(arg_is_orig), creation_time(network_time)
PendingFile::PendingFile(Connection* arg_conn, bool arg_is_orig,
AnalyzerTag::Tag arg_tag)
: conn(arg_conn), is_orig(arg_is_orig), creation_time(network_time),
tag(arg_tag)
{
Ref(conn);
DBG_LOG(DBG_FILE_ANALYSIS, "New pending file: %s", conn_str(conn).c_str());
@ -47,16 +49,18 @@ bool PendingFile::IsStale() const
}
PendingDataInChunk::PendingDataInChunk(const u_char* arg_data, uint64 arg_len,
uint64 arg_offset, Connection* arg_conn,
bool arg_is_orig)
: PendingFile(arg_conn, arg_is_orig), len(arg_len), offset(arg_offset)
uint64 arg_offset,
AnalyzerTag::Tag arg_tag,
Connection* arg_conn, bool arg_is_orig)
: PendingFile(arg_conn, arg_is_orig, arg_tag), len(arg_len),
offset(arg_offset)
{
copy_data(&data, arg_data, len);
}
bool PendingDataInChunk::Retry() const
{
return file_mgr->DataIn(data, len, offset, conn, is_orig);
return file_mgr->DataIn(data, len, offset, tag, conn, is_orig);
}
PendingDataInChunk::~PendingDataInChunk()
@ -65,15 +69,16 @@ PendingDataInChunk::~PendingDataInChunk()
}
PendingDataInStream::PendingDataInStream(const u_char* arg_data, uint64 arg_len,
Connection* arg_conn, bool arg_is_orig)
: PendingFile(arg_conn, arg_is_orig), len(arg_len)
AnalyzerTag::Tag arg_tag,
Connection* arg_conn, bool arg_is_orig)
: PendingFile(arg_conn, arg_is_orig, arg_tag), len(arg_len)
{
copy_data(&data, arg_data, len);
}
bool PendingDataInStream::Retry() const
{
return file_mgr->DataIn(data, len, conn, is_orig);
return file_mgr->DataIn(data, len, tag, conn, is_orig);
}
PendingDataInStream::~PendingDataInStream()
@ -81,15 +86,17 @@ PendingDataInStream::~PendingDataInStream()
delete [] data;
}
PendingGap::PendingGap(uint64 arg_offset, uint64 arg_len, Connection* arg_conn,
PendingGap::PendingGap(uint64 arg_offset, uint64 arg_len,
AnalyzerTag::Tag arg_tag, Connection* arg_conn,
bool arg_is_orig)
: PendingFile(arg_conn, arg_is_orig), offset(arg_offset), len(arg_len)
: PendingFile(arg_conn, arg_is_orig, arg_tag), offset(arg_offset),
len(arg_len)
{
}
bool PendingGap::Retry() const
{
return file_mgr->Gap(offset, len, conn, is_orig);
return file_mgr->Gap(offset, len, tag, conn, is_orig);
}
PendingEOF::PendingEOF(Connection* arg_conn, bool arg_is_orig)
@ -102,13 +109,13 @@ bool PendingEOF::Retry() const
return file_mgr->EndOfFile(conn, is_orig);
}
PendingSize::PendingSize(uint64 arg_size, Connection* arg_conn,
bool arg_is_orig)
: PendingFile(arg_conn, arg_is_orig), size(arg_size)
PendingSize::PendingSize(uint64 arg_size, AnalyzerTag::Tag arg_tag,
Connection* arg_conn, bool arg_is_orig)
: PendingFile(arg_conn, arg_is_orig, arg_tag), size(arg_size)
{
}
bool PendingSize::Retry() const
{
return file_mgr->SetSize(size, conn, is_orig);
return file_mgr->SetSize(size, tag, conn, is_orig);
}

View file

@ -1,6 +1,7 @@
#ifndef FILE_ANALYSIS_PENDINGFILE_H
#define FILE_ANALYSIS_PENDINGFILE_H
#include "AnalyzerTags.h"
#include "Conn.h"
namespace file_analysis {
@ -16,19 +17,21 @@ public:
protected:
PendingFile(Connection* arg_conn, bool arg_is_orig);
PendingFile(Connection* arg_conn, bool arg_is_orig,
AnalyzerTag::Tag arg_tag = AnalyzerTag::Error);
Connection* conn;
bool is_orig;
double creation_time;
AnalyzerTag::Tag tag;
};
class PendingDataInChunk : public PendingFile {
public:
PendingDataInChunk(const u_char* arg_data, uint64 arg_len,
uint64 arg_offset, Connection* arg_conn,
bool arg_is_orig);
uint64 arg_offset, AnalyzerTag::Tag tag,
Connection* arg_conn, bool arg_is_orig);
virtual ~PendingDataInChunk();
@ -45,7 +48,8 @@ class PendingDataInStream : public PendingFile {
public:
PendingDataInStream(const u_char* arg_data, uint64 arg_len,
Connection* arg_conn, bool arg_is_orig);
AnalyzerTag::Tag tag, Connection* arg_conn,
bool arg_is_orig);
virtual ~PendingDataInStream();
@ -60,8 +64,8 @@ protected:
class PendingGap : public PendingFile {
public:
PendingGap(uint64 arg_offset, uint64 arg_len, Connection* arg_conn,
bool arg_is_orig);
PendingGap(uint64 arg_offset, uint64 arg_len, AnalyzerTag::Tag tag,
Connection* arg_conn, bool arg_is_orig);
virtual bool Retry() const;
@ -82,7 +86,8 @@ public:
class PendingSize : public PendingFile {
public:
PendingSize(uint64 arg_size, Connection* arg_conn, bool arg_is_orig);
PendingSize(uint64 arg_size, AnalyzerTag::Tag tag, Connection* arg_conn,
bool arg_is_orig);
virtual bool Retry() const;