mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
File analysis framework interface simplifications.
- Remove script-layer data input interface (will be managed directly by input framework later). - Only track files internally by file id hash. Chance of collision too small to justify also tracking unique file string.
This commit is contained in:
parent
e46300a724
commit
90fa331279
10 changed files with 125 additions and 357 deletions
|
@ -171,58 +171,6 @@ export {
|
|||
## rest of it's contents, or false if analysis for the *id*
|
||||
## isn't currently active.
|
||||
global stop: function(f: fa_file): bool;
|
||||
|
||||
## Sends a sequential stream of data in for file analysis.
|
||||
## Meant for use when providing external file analysis input (e.g.
|
||||
## from the input framework).
|
||||
##
|
||||
## source: a string that uniquely identifies the logical file that the
|
||||
## data is a part of and describes its source.
|
||||
##
|
||||
## data: bytestring contents of the file to analyze.
|
||||
global data_stream: function(source: string, data: string);
|
||||
|
||||
## Sends a non-sequential chunk of data in for file analysis.
|
||||
## Meant for use when providing external file analysis input (e.g.
|
||||
## from the input framework).
|
||||
##
|
||||
## source: a string that uniquely identifies the logical file that the
|
||||
## data is a part of and describes its source.
|
||||
##
|
||||
## data: bytestring contents of the file to analyze.
|
||||
##
|
||||
## offset: the offset within the file that this chunk starts.
|
||||
global data_chunk: function(source: string, data: string, offset: count);
|
||||
|
||||
## Signals a content gap in the file bytestream.
|
||||
## Meant for use when providing external file analysis input (e.g.
|
||||
## from the input framework).
|
||||
##
|
||||
## source: a string that uniquely identifies the logical file that the
|
||||
## data is a part of and describes its source.
|
||||
##
|
||||
## offset: the offset within the file that this gap starts.
|
||||
##
|
||||
## len: the number of bytes that are missing.
|
||||
global gap: function(source: string, offset: count, len: count);
|
||||
|
||||
## Signals the total size of a file.
|
||||
## Meant for use when providing external file analysis input (e.g.
|
||||
## from the input framework).
|
||||
##
|
||||
## source: a string that uniquely identifies the logical file that the
|
||||
## data is a part of and describes its source.
|
||||
##
|
||||
## size: the number of bytes that comprise the full file.
|
||||
global set_size: function(source: string, size: count);
|
||||
|
||||
## Signals the end of a file.
|
||||
## Meant for use when providing external file analysis input (e.g.
|
||||
## from the input framework).
|
||||
##
|
||||
## source: a string that uniquely identifies the logical file that the
|
||||
## data is a part of and describes its source.
|
||||
global eof: function(source: string);
|
||||
}
|
||||
|
||||
redef record fa_file += {
|
||||
|
@ -287,31 +235,6 @@ function stop(f: fa_file): bool
|
|||
return __stop(f$id);
|
||||
}
|
||||
|
||||
function data_stream(source: string, data: string)
|
||||
{
|
||||
__data_stream(source, data);
|
||||
}
|
||||
|
||||
function data_chunk(source: string, data: string, offset: count)
|
||||
{
|
||||
__data_chunk(source, data, offset);
|
||||
}
|
||||
|
||||
function gap(source: string, offset: count, len: count)
|
||||
{
|
||||
__gap(source, offset, len);
|
||||
}
|
||||
|
||||
function set_size(source: string, size: count)
|
||||
{
|
||||
__set_size(source, size);
|
||||
}
|
||||
|
||||
function eof(source: string)
|
||||
{
|
||||
__eof(source);
|
||||
}
|
||||
|
||||
event bro_init() &priority=5
|
||||
{
|
||||
Log::create_stream(FileAnalysis::LOG,
|
||||
|
|
|
@ -464,7 +464,6 @@ set(bro_SRCS
|
|||
file_analysis/Manager.cc
|
||||
file_analysis/File.cc
|
||||
file_analysis/FileTimer.cc
|
||||
file_analysis/FileID.h
|
||||
file_analysis/Analyzer.h
|
||||
file_analysis/AnalyzerSet.cc
|
||||
file_analysis/Extract.cc
|
||||
|
|
|
@ -30,27 +30,23 @@ enum Analyzer %{
|
|||
## :bro:see:`FileAnalysis::postpone_timeout`.
|
||||
function FileAnalysis::__postpone_timeout%(file_id: string%): bool
|
||||
%{
|
||||
using file_analysis::FileID;
|
||||
bool result = file_mgr->PostponeTimeout(FileID(file_id->CheckString()));
|
||||
bool result = file_mgr->PostponeTimeout(file_id->CheckString());
|
||||
return new Val(result, TYPE_BOOL);
|
||||
%}
|
||||
|
||||
## :bro:see:`FileAnalysis::set_timeout_interval`.
|
||||
function FileAnalysis::__set_timeout_interval%(file_id: string, t: interval%): bool
|
||||
%{
|
||||
using file_analysis::FileID;
|
||||
bool result = file_mgr->SetTimeoutInterval(FileID(file_id->CheckString()),
|
||||
t);
|
||||
bool result = file_mgr->SetTimeoutInterval(file_id->CheckString(), t);
|
||||
return new Val(result, TYPE_BOOL);
|
||||
%}
|
||||
|
||||
## :bro:see:`FileAnalysis::add_analyzer`.
|
||||
function FileAnalysis::__add_analyzer%(file_id: string, args: any%): bool
|
||||
%{
|
||||
using file_analysis::FileID;
|
||||
using BifType::Record::FileAnalysis::AnalyzerArgs;
|
||||
RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs);
|
||||
bool result = file_mgr->AddAnalyzer(FileID(file_id->CheckString()), rv);
|
||||
bool result = file_mgr->AddAnalyzer(file_id->CheckString(), rv);
|
||||
Unref(rv);
|
||||
return new Val(result, TYPE_BOOL);
|
||||
%}
|
||||
|
@ -58,10 +54,9 @@ function FileAnalysis::__add_analyzer%(file_id: string, args: any%): bool
|
|||
## :bro:see:`FileAnalysis::remove_analyzer`.
|
||||
function FileAnalysis::__remove_analyzer%(file_id: string, args: any%): bool
|
||||
%{
|
||||
using file_analysis::FileID;
|
||||
using BifType::Record::FileAnalysis::AnalyzerArgs;
|
||||
RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs);
|
||||
bool result = file_mgr->RemoveAnalyzer(FileID(file_id->CheckString()), rv);
|
||||
bool result = file_mgr->RemoveAnalyzer(file_id->CheckString(), rv);
|
||||
Unref(rv);
|
||||
return new Val(result, TYPE_BOOL);
|
||||
%}
|
||||
|
@ -69,47 +64,10 @@ function FileAnalysis::__remove_analyzer%(file_id: string, args: any%): bool
|
|||
## :bro:see:`FileAnalysis::stop`.
|
||||
function FileAnalysis::__stop%(file_id: string%): bool
|
||||
%{
|
||||
using file_analysis::FileID;
|
||||
bool result = file_mgr->IgnoreFile(FileID(file_id->CheckString()));
|
||||
bool result = file_mgr->IgnoreFile(file_id->CheckString());
|
||||
return new Val(result, TYPE_BOOL);
|
||||
%}
|
||||
|
||||
## :bro:see:`FileAnalysis::data_stream`.
|
||||
function FileAnalysis::__data_stream%(source: string, data: string%): any
|
||||
%{
|
||||
file_mgr->DataIn(data->Bytes(), data->Len(), source->CheckString());
|
||||
return 0;
|
||||
%}
|
||||
|
||||
## :bro:see:`FileAnalysis::data_chunk`.
|
||||
function FileAnalysis::__data_chunk%(source: string, data: string,
|
||||
offset: count%): any
|
||||
%{
|
||||
file_mgr->DataIn(data->Bytes(), data->Len(), offset, source->CheckString());
|
||||
return 0;
|
||||
%}
|
||||
|
||||
## :bro:see:`FileAnalysis::gap`.
|
||||
function FileAnalysis::__gap%(source: string, offset: count, len: count%): any
|
||||
%{
|
||||
file_mgr->Gap(offset, len, source->CheckString());
|
||||
return 0;
|
||||
%}
|
||||
|
||||
## :bro:see:`FileAnalysis::set_size`.
|
||||
function FileAnalysis::__set_size%(source: string, size: count%): any
|
||||
%{
|
||||
file_mgr->SetSize(size, source->CheckString());
|
||||
return 0;
|
||||
%}
|
||||
|
||||
## :bro:see:`FileAnalysis::eof`.
|
||||
function FileAnalysis::__eof%(source: string%): any
|
||||
%{
|
||||
file_mgr->EndOfFile(source->CheckString());
|
||||
return 0;
|
||||
%}
|
||||
|
||||
module GLOBAL;
|
||||
|
||||
## For use within a :bro:see:`get_file_handle` handler to set a unique
|
||||
|
|
|
@ -1,11 +1,9 @@
|
|||
// See the file "COPYING" in the main distribution directory for copyright.
|
||||
|
||||
#include <string>
|
||||
#include <openssl/md5.h>
|
||||
|
||||
#include "File.h"
|
||||
#include "FileTimer.h"
|
||||
#include "FileID.h"
|
||||
#include "Analyzer.h"
|
||||
#include "Manager.h"
|
||||
#include "Reporter.h"
|
||||
|
@ -51,8 +49,6 @@ int File::bof_buffer_size_idx = -1;
|
|||
int File::bof_buffer_idx = -1;
|
||||
int File::mime_type_idx = -1;
|
||||
|
||||
string File::salt;
|
||||
|
||||
void File::StaticInit()
|
||||
{
|
||||
if ( id_idx != -1 )
|
||||
|
@ -72,31 +68,19 @@ void File::StaticInit()
|
|||
bof_buffer_size_idx = Idx("bof_buffer_size");
|
||||
bof_buffer_idx = Idx("bof_buffer");
|
||||
mime_type_idx = Idx("mime_type");
|
||||
|
||||
salt = BifConst::FileAnalysis::salt->CheckString();
|
||||
}
|
||||
|
||||
File::File(const string& unique, Connection* conn, AnalyzerTag::Tag tag,
|
||||
File::File(const string& file_id, Connection* conn, AnalyzerTag::Tag tag,
|
||||
bool is_orig)
|
||||
: id(""), unique(unique), val(0), postpone_timeout(false),
|
||||
first_chunk(true), missed_bof(false), need_reassembly(false), done(false),
|
||||
analyzers(this)
|
||||
: id(file_id), val(0), postpone_timeout(false), first_chunk(true),
|
||||
missed_bof(false), need_reassembly(false), done(false), analyzers(this)
|
||||
{
|
||||
StaticInit();
|
||||
|
||||
char tmp[20];
|
||||
uint64 hash[2];
|
||||
string msg(unique + salt);
|
||||
MD5(reinterpret_cast<const u_char*>(msg.data()), msg.size(),
|
||||
reinterpret_cast<u_char*>(hash));
|
||||
uitoa_n(hash[0], tmp, sizeof(tmp), 62);
|
||||
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Creating new File object %s (%s)", tmp,
|
||||
unique.c_str());
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Creating new File object %s", file_id.c_str());
|
||||
|
||||
val = new RecordVal(fa_file_type);
|
||||
val->Assign(id_idx, new StringVal(tmp));
|
||||
id = FileID(tmp);
|
||||
val->Assign(id_idx, new StringVal(file_id.c_str()));
|
||||
|
||||
if ( conn )
|
||||
{
|
||||
|
@ -106,8 +90,9 @@ File::File(const string& unique, Connection* conn, AnalyzerTag::Tag tag,
|
|||
UpdateConnectionFields(conn);
|
||||
}
|
||||
else
|
||||
// use the unique file handle as source
|
||||
val->Assign(source_idx, new StringVal(unique.c_str()));
|
||||
{
|
||||
// TODO: what to use as source field? (input framework interface)
|
||||
}
|
||||
|
||||
UpdateLastActivityTime();
|
||||
}
|
||||
|
@ -423,7 +408,7 @@ void File::Gap(uint64 offset, uint64 len)
|
|||
|
||||
bool File::FileEventAvailable(EventHandlerPtr h)
|
||||
{
|
||||
return h && ! file_mgr->IsIgnored(unique);
|
||||
return h && ! file_mgr->IsIgnored(id);
|
||||
}
|
||||
|
||||
void File::FileEvent(EventHandlerPtr h)
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
#include "Conn.h"
|
||||
#include "Val.h"
|
||||
#include "AnalyzerSet.h"
|
||||
#include "FileID.h"
|
||||
#include "BroString.h"
|
||||
|
||||
namespace file_analysis {
|
||||
|
@ -40,12 +39,7 @@ public:
|
|||
/**
|
||||
* @return value of the "id" field from #val record.
|
||||
*/
|
||||
FileID GetID() const { return id; }
|
||||
|
||||
/**
|
||||
* @return the string which uniquely identifies the file.
|
||||
*/
|
||||
string GetUnique() const { return unique; }
|
||||
string GetID() const { return id; }
|
||||
|
||||
/**
|
||||
* @return value of "last_active" field in #val record;
|
||||
|
@ -131,7 +125,7 @@ protected:
|
|||
/**
|
||||
* Constructor; only file_analysis::Manager should be creating these.
|
||||
*/
|
||||
File(const string& unique, Connection* conn = 0,
|
||||
File(const string& file_id, Connection* conn = 0,
|
||||
AnalyzerTag::Tag tag = AnalyzerTag::Error, bool is_orig = false);
|
||||
|
||||
/**
|
||||
|
@ -186,8 +180,7 @@ protected:
|
|||
static void StaticInit();
|
||||
|
||||
private:
|
||||
FileID id; /**< A pretty hash that likely identifies file */
|
||||
string unique; /**< A string that uniquely identifies file */
|
||||
string id; /**< A pretty hash that likely identifies file */
|
||||
RecordVal* val; /**< \c fa_file from script layer. */
|
||||
bool postpone_timeout; /**< Whether postponing timeout is requested. */
|
||||
bool first_chunk; /**< Track first non-linear chunk. */
|
||||
|
@ -207,8 +200,6 @@ private:
|
|||
BroString::CVec chunks;
|
||||
} bof_buffer; /**< Beginning of file buffer. */
|
||||
|
||||
static string salt;
|
||||
|
||||
static int id_idx;
|
||||
static int parent_id_idx;
|
||||
static int source_idx;
|
||||
|
|
|
@ -1,34 +0,0 @@
|
|||
// See the file "COPYING" in the main distribution directory for copyright.
|
||||
|
||||
#ifndef FILE_ANALYSIS_FILEID_H
|
||||
#define FILE_ANALYSIS_FILEID_H
|
||||
|
||||
namespace file_analysis {
|
||||
|
||||
/**
|
||||
* A simple string wrapper class to help enforce some type safety between
|
||||
* methods of FileAnalysis::Manager, some of which use a unique string to
|
||||
* identify files, and others which use a pretty hash (the FileID) to identify
|
||||
* files. A FileID is primarily used in methods which interface with the
|
||||
* script-layer, while the unique strings are used for methods which interface
|
||||
* with protocol analyzers or anything that sends data to the file analysis
|
||||
* framework.
|
||||
*/
|
||||
struct FileID {
|
||||
string id;
|
||||
|
||||
explicit FileID(const string arg_id) : id(arg_id) {}
|
||||
FileID(const FileID& other) : id(other.id) {}
|
||||
|
||||
const char* c_str() const { return id.c_str(); }
|
||||
|
||||
bool operator==(const FileID& rhs) const { return id == rhs.id; }
|
||||
bool operator<(const FileID& rhs) const { return id < rhs.id; }
|
||||
|
||||
FileID& operator=(const FileID& rhs) { id = rhs.id; return *this; }
|
||||
FileID& operator=(const string& rhs) { id = rhs; return *this; }
|
||||
};
|
||||
|
||||
} // namespace file_analysis
|
||||
|
||||
#endif
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
using namespace file_analysis;
|
||||
|
||||
FileTimer::FileTimer(double t, const FileID& id, double interval)
|
||||
FileTimer::FileTimer(double t, const string& id, double interval)
|
||||
: Timer(t + interval, TIMER_FILE_ANALYSIS_INACTIVITY), file_id(id)
|
||||
{
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "New %f second timeout timer for %s",
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
#include <string>
|
||||
#include "Timer.h"
|
||||
#include "FileID.h"
|
||||
|
||||
namespace file_analysis {
|
||||
|
||||
|
@ -14,7 +13,7 @@ namespace file_analysis {
|
|||
*/
|
||||
class FileTimer : public Timer {
|
||||
public:
|
||||
FileTimer(double t, const FileID& id, double interval);
|
||||
FileTimer(double t, const string& id, double interval);
|
||||
|
||||
/**
|
||||
* Check inactivity of file_analysis::File corresponding to #file_id,
|
||||
|
@ -23,7 +22,7 @@ public:
|
|||
void Dispatch(double t, int is_expire);
|
||||
|
||||
private:
|
||||
FileID file_id;
|
||||
string file_id;
|
||||
};
|
||||
|
||||
} // namespace file_analysis
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <openssl/md5.h>
|
||||
|
||||
#include "Manager.h"
|
||||
#include "File.h"
|
||||
|
@ -24,7 +25,7 @@ Manager::~Manager()
|
|||
|
||||
void Manager::Terminate()
|
||||
{
|
||||
vector<FileID> keys;
|
||||
vector<string> keys;
|
||||
for ( IDMap::iterator it = id_map.begin(); it != id_map.end(); ++it )
|
||||
keys.push_back(it->first);
|
||||
|
||||
|
@ -32,66 +33,60 @@ void Manager::Terminate()
|
|||
Timeout(keys[i], true);
|
||||
}
|
||||
|
||||
string Manager::HashHandle(const string& handle) const
|
||||
{
|
||||
static string salt;
|
||||
|
||||
if ( salt.empty() )
|
||||
salt = BifConst::FileAnalysis::salt->CheckString();
|
||||
|
||||
char tmp[20];
|
||||
uint64 hash[2];
|
||||
string msg(handle + salt);
|
||||
|
||||
MD5(reinterpret_cast<const u_char*>(msg.data()), msg.size(),
|
||||
reinterpret_cast<u_char*>(hash));
|
||||
uitoa_n(hash[0], tmp, sizeof(tmp), 62);
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
void Manager::SetHandle(const string& handle)
|
||||
{
|
||||
current_handle = handle;
|
||||
if ( handle.empty() )
|
||||
return;
|
||||
|
||||
current_file_id = HashHandle(handle);
|
||||
}
|
||||
|
||||
void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
|
||||
AnalyzerTag::Tag tag, Connection* conn, bool is_orig)
|
||||
{
|
||||
if ( IsDisabled(tag) )
|
||||
return;
|
||||
File* file = GetFile(conn, tag, is_orig);
|
||||
|
||||
GetFileHandle(tag, conn, is_orig);
|
||||
DataIn(data, len, offset, GetFile(current_handle, conn, tag, is_orig));
|
||||
}
|
||||
|
||||
void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
|
||||
const string& unique)
|
||||
{
|
||||
DataIn(data, len, offset, GetFile(unique));
|
||||
}
|
||||
|
||||
void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
|
||||
File* file)
|
||||
{
|
||||
if ( ! file )
|
||||
return;
|
||||
|
||||
file->DataIn(data, len, offset);
|
||||
|
||||
if ( file->IsComplete() )
|
||||
RemoveFile(file->GetUnique());
|
||||
RemoveFile(file->GetID());
|
||||
}
|
||||
|
||||
void Manager::DataIn(const u_char* data, uint64 len, AnalyzerTag::Tag tag,
|
||||
Connection* conn, bool is_orig)
|
||||
{
|
||||
if ( IsDisabled(tag) )
|
||||
return;
|
||||
|
||||
GetFileHandle(tag, conn, is_orig);
|
||||
|
||||
// Sequential data input shouldn't be going over multiple conns, so don't
|
||||
// do the check to update connection set.
|
||||
DataIn(data, len, GetFile(current_handle, conn, tag, is_orig, false));
|
||||
}
|
||||
File* file = GetFile(conn, tag, is_orig, false);
|
||||
|
||||
void Manager::DataIn(const u_char* data, uint64 len, const string& unique)
|
||||
{
|
||||
DataIn(data, len, GetFile(unique));
|
||||
}
|
||||
|
||||
void Manager::DataIn(const u_char* data, uint64 len, File* file)
|
||||
{
|
||||
if ( ! file )
|
||||
return;
|
||||
|
||||
file->DataIn(data, len);
|
||||
|
||||
if ( file->IsComplete() )
|
||||
RemoveFile(file->GetUnique());
|
||||
RemoveFile(file->GetID());
|
||||
}
|
||||
|
||||
void Manager::EndOfFile(AnalyzerTag::Tag tag, Connection* conn)
|
||||
|
@ -102,35 +97,16 @@ void Manager::EndOfFile(AnalyzerTag::Tag tag, Connection* conn)
|
|||
|
||||
void Manager::EndOfFile(AnalyzerTag::Tag tag, Connection* conn, bool is_orig)
|
||||
{
|
||||
if ( IsDisabled(tag) )
|
||||
return;
|
||||
|
||||
// Don't need to create a file if we're just going to remove it right away.
|
||||
GetFileHandle(tag, conn, is_orig);
|
||||
EndOfFile(current_handle);
|
||||
}
|
||||
|
||||
void Manager::EndOfFile(const string& unique)
|
||||
{
|
||||
RemoveFile(unique);
|
||||
RemoveFile(current_file_id);
|
||||
}
|
||||
|
||||
void Manager::Gap(uint64 offset, uint64 len, AnalyzerTag::Tag tag,
|
||||
Connection* conn, bool is_orig)
|
||||
{
|
||||
if ( IsDisabled(tag) )
|
||||
return;
|
||||
File* file = GetFile(conn, tag, is_orig);
|
||||
|
||||
GetFileHandle(tag, conn, is_orig);
|
||||
Gap(offset, len, GetFile(current_handle, conn, tag, is_orig));
|
||||
}
|
||||
|
||||
void Manager::Gap(uint64 offset, uint64 len, const string& unique)
|
||||
{
|
||||
Gap(offset, len, GetFile(unique));
|
||||
}
|
||||
|
||||
void Manager::Gap(uint64 offset, uint64 len, File* file)
|
||||
{
|
||||
if ( ! file )
|
||||
return;
|
||||
|
||||
|
@ -140,30 +116,18 @@ void Manager::Gap(uint64 offset, uint64 len, File* file)
|
|||
void Manager::SetSize(uint64 size, AnalyzerTag::Tag tag, Connection* conn,
|
||||
bool is_orig)
|
||||
{
|
||||
if ( IsDisabled(tag) )
|
||||
return;
|
||||
File* file = GetFile(conn, tag, is_orig);
|
||||
|
||||
GetFileHandle(tag, conn, is_orig);
|
||||
SetSize(size, GetFile(current_handle, conn, tag, is_orig));
|
||||
}
|
||||
|
||||
void Manager::SetSize(uint64 size, const string& unique)
|
||||
{
|
||||
SetSize(size, GetFile(unique));
|
||||
}
|
||||
|
||||
void Manager::SetSize(uint64 size, File* file)
|
||||
{
|
||||
if ( ! file )
|
||||
return;
|
||||
|
||||
file->SetTotalBytes(size);
|
||||
|
||||
if ( file->IsComplete() )
|
||||
RemoveFile(file->GetUnique());
|
||||
RemoveFile(file->GetID());
|
||||
}
|
||||
|
||||
bool Manager::PostponeTimeout(const FileID& file_id) const
|
||||
bool Manager::PostponeTimeout(const string& file_id) const
|
||||
{
|
||||
File* file = Lookup(file_id);
|
||||
|
||||
|
@ -174,7 +138,7 @@ bool Manager::PostponeTimeout(const FileID& file_id) const
|
|||
return true;
|
||||
}
|
||||
|
||||
bool Manager::SetTimeoutInterval(const FileID& file_id, double interval) const
|
||||
bool Manager::SetTimeoutInterval(const string& file_id, double interval) const
|
||||
{
|
||||
File* file = Lookup(file_id);
|
||||
|
||||
|
@ -185,7 +149,7 @@ bool Manager::SetTimeoutInterval(const FileID& file_id, double interval) const
|
|||
return true;
|
||||
}
|
||||
|
||||
bool Manager::AddAnalyzer(const FileID& file_id, RecordVal* args) const
|
||||
bool Manager::AddAnalyzer(const string& file_id, RecordVal* args) const
|
||||
{
|
||||
File* file = Lookup(file_id);
|
||||
|
||||
|
@ -195,7 +159,7 @@ bool Manager::AddAnalyzer(const FileID& file_id, RecordVal* args) const
|
|||
return file->AddAnalyzer(args);
|
||||
}
|
||||
|
||||
bool Manager::RemoveAnalyzer(const FileID& file_id, const RecordVal* args) const
|
||||
bool Manager::RemoveAnalyzer(const string& file_id, const RecordVal* args) const
|
||||
{
|
||||
File* file = Lookup(file_id);
|
||||
|
||||
|
@ -205,32 +169,27 @@ bool Manager::RemoveAnalyzer(const FileID& file_id, const RecordVal* args) const
|
|||
return file->RemoveAnalyzer(args);
|
||||
}
|
||||
|
||||
File* Manager::GetFile(const string& unique, Connection* conn,
|
||||
AnalyzerTag::Tag tag, bool is_orig, bool update_conn)
|
||||
File* Manager::GetFile(Connection* conn, AnalyzerTag::Tag tag, bool is_orig,
|
||||
bool update_conn)
|
||||
{
|
||||
if ( unique.empty() )
|
||||
// sets current_file_id for us
|
||||
GetFileHandle(tag, conn, is_orig);
|
||||
|
||||
if ( current_file_id.empty() )
|
||||
return 0;
|
||||
|
||||
if ( IsIgnored(unique) )
|
||||
if ( IsIgnored(current_file_id) )
|
||||
return 0;
|
||||
|
||||
File* rval = str_map[unique];
|
||||
File* rval = id_map[current_file_id];
|
||||
|
||||
if ( ! rval )
|
||||
{
|
||||
rval = str_map[unique] = new File(unique, conn, tag, is_orig);
|
||||
FileID id = rval->GetID();
|
||||
|
||||
if ( id_map[id] )
|
||||
{
|
||||
reporter->Error("Evicted duplicate file ID: %s", id.c_str());
|
||||
RemoveFile(unique);
|
||||
}
|
||||
|
||||
id_map[id] = rval;
|
||||
rval = id_map[current_file_id] = new File(current_file_id, conn, tag,
|
||||
is_orig);
|
||||
rval->ScheduleInactivityTimer();
|
||||
|
||||
if ( IsIgnored(unique) )
|
||||
if ( IsIgnored(current_file_id) )
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
|
@ -244,7 +203,7 @@ File* Manager::GetFile(const string& unique, Connection* conn,
|
|||
return rval;
|
||||
}
|
||||
|
||||
File* Manager::Lookup(const FileID& file_id) const
|
||||
File* Manager::Lookup(const string& file_id) const
|
||||
{
|
||||
IDMap::const_iterator it = id_map.find(file_id);
|
||||
|
||||
|
@ -254,7 +213,7 @@ File* Manager::Lookup(const FileID& file_id) const
|
|||
return it->second;
|
||||
}
|
||||
|
||||
void Manager::Timeout(const FileID& file_id, bool is_terminating)
|
||||
void Manager::Timeout(const string& file_id, bool is_terminating)
|
||||
{
|
||||
File* file = Lookup(file_id);
|
||||
|
||||
|
@ -277,53 +236,50 @@ void Manager::Timeout(const FileID& file_id, bool is_terminating)
|
|||
DBG_LOG(DBG_FILE_ANALYSIS, "File analysis timeout for %s",
|
||||
file->GetID().c_str());
|
||||
|
||||
RemoveFile(file->GetUnique());
|
||||
RemoveFile(file->GetID());
|
||||
}
|
||||
|
||||
bool Manager::IgnoreFile(const FileID& file_id)
|
||||
bool Manager::IgnoreFile(const string& file_id)
|
||||
{
|
||||
if ( id_map.find(file_id) == id_map.end() )
|
||||
return false;
|
||||
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Ignore FileID %s", file_id.c_str());
|
||||
|
||||
ignored.insert(file_id);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Manager::RemoveFile(const string& file_id)
|
||||
{
|
||||
IDMap::iterator it = id_map.find(file_id);
|
||||
|
||||
if ( it == id_map.end() )
|
||||
return false;
|
||||
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Ignore FileID %s", file_id.c_str());
|
||||
|
||||
ignored.insert(it->second->GetUnique());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Manager::RemoveFile(const string& unique)
|
||||
{
|
||||
StrMap::iterator it = str_map.find(unique);
|
||||
|
||||
if ( it == str_map.end() )
|
||||
return false;
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", file_id.c_str());
|
||||
|
||||
it->second->EndOfFile();
|
||||
|
||||
FileID id = it->second->GetID();
|
||||
|
||||
DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", id.c_str());
|
||||
|
||||
if ( ! id_map.erase(id) )
|
||||
reporter->Error("No mapping for fileID %s", id.c_str());
|
||||
|
||||
ignored.erase(unique);
|
||||
delete it->second;
|
||||
str_map.erase(unique);
|
||||
id_map.erase(file_id);
|
||||
ignored.erase(file_id);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Manager::IsIgnored(const string& unique)
|
||||
bool Manager::IsIgnored(const string& file_id)
|
||||
{
|
||||
return ignored.find(unique) != ignored.end();
|
||||
return ignored.find(file_id) != ignored.end();
|
||||
}
|
||||
|
||||
void Manager::GetFileHandle(AnalyzerTag::Tag tag, Connection* c, bool is_orig)
|
||||
{
|
||||
current_handle.clear();
|
||||
current_file_id.clear();
|
||||
|
||||
if ( IsDisabled(tag) )
|
||||
return;
|
||||
|
||||
if ( ! get_file_handle )
|
||||
return;
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
|
||||
#include "File.h"
|
||||
#include "FileTimer.h"
|
||||
#include "FileID.h"
|
||||
|
||||
namespace file_analysis {
|
||||
|
||||
|
@ -36,7 +35,12 @@ public:
|
|||
void Terminate();
|
||||
|
||||
/**
|
||||
* Take in a unique file handle string to identifiy incoming file data.
|
||||
* @return a prettified MD5 hash of \a handle, truncated to 64-bits.
|
||||
*/
|
||||
string HashHandle(const string& handle) const;
|
||||
|
||||
/**
|
||||
* Take in a unique file handle string to identify incoming file data.
|
||||
*/
|
||||
void SetHandle(const string& handle);
|
||||
|
||||
|
@ -45,59 +49,48 @@ public:
|
|||
*/
|
||||
void DataIn(const u_char* data, uint64 len, uint64 offset,
|
||||
AnalyzerTag::Tag tag, Connection* conn, bool is_orig);
|
||||
void DataIn(const u_char* data, uint64 len, uint64 offset,
|
||||
const string& unique);
|
||||
void DataIn(const u_char* data, uint64 len, uint64 offset,
|
||||
File* file);
|
||||
|
||||
/**
|
||||
* Pass in sequential file data.
|
||||
*/
|
||||
void DataIn(const u_char* data, uint64 len, AnalyzerTag::Tag tag,
|
||||
Connection* conn, bool is_orig);
|
||||
void DataIn(const u_char* data, uint64 len, const string& unique);
|
||||
void DataIn(const u_char* data, uint64 len, File* file);
|
||||
|
||||
/**
|
||||
* Signal the end of file data.
|
||||
*/
|
||||
void EndOfFile(AnalyzerTag::Tag tag, Connection* conn);
|
||||
void EndOfFile(AnalyzerTag::Tag tag, Connection* conn, bool is_orig);
|
||||
void EndOfFile(const string& unique);
|
||||
|
||||
/**
|
||||
* Signal a gap in the file data stream.
|
||||
*/
|
||||
void Gap(uint64 offset, uint64 len, AnalyzerTag::Tag tag, Connection* conn,
|
||||
bool is_orig);
|
||||
void Gap(uint64 offset, uint64 len, const string& unique);
|
||||
void Gap(uint64 offset, uint64 len, File* file);
|
||||
|
||||
/**
|
||||
* Provide the expected number of bytes that comprise a file.
|
||||
*/
|
||||
void SetSize(uint64 size, AnalyzerTag::Tag tag, Connection* conn,
|
||||
bool is_orig);
|
||||
void SetSize(uint64 size, const string& unique);
|
||||
void SetSize(uint64 size, File* file);
|
||||
|
||||
/**
|
||||
* Starts ignoring a file, which will finally be removed from internal
|
||||
* mappings on EOF or TIMEOUT.
|
||||
* @return false if file identifier did not map to anything, else true.
|
||||
*/
|
||||
bool IgnoreFile(const FileID& file_id);
|
||||
bool IgnoreFile(const string& file_id);
|
||||
|
||||
/**
|
||||
* If called during a \c file_timeout event handler, requests deferral of
|
||||
* analysis timeout.
|
||||
*/
|
||||
bool PostponeTimeout(const FileID& file_id) const;
|
||||
bool PostponeTimeout(const string& file_id) const;
|
||||
|
||||
/**
|
||||
* Set's an inactivity threshold for the file.
|
||||
*/
|
||||
bool SetTimeoutInterval(const FileID& file_id, double interval) const;
|
||||
bool SetTimeoutInterval(const string& file_id, double interval) const;
|
||||
|
||||
/**
|
||||
* Queue attachment of an analzer to the file identifier. Multiple
|
||||
|
@ -105,34 +98,33 @@ public:
|
|||
* as long as the arguments differ.
|
||||
* @return false if the analyzer failed to be instantiated, else true.
|
||||
*/
|
||||
bool AddAnalyzer(const FileID& file_id, RecordVal* args) const;
|
||||
bool AddAnalyzer(const string& file_id, RecordVal* args) const;
|
||||
|
||||
/**
|
||||
* Queue removal of an analyzer for a given file identifier.
|
||||
* @return true if the analyzer is active at the time of call, else false.
|
||||
*/
|
||||
bool RemoveAnalyzer(const FileID& file_id, const RecordVal* args) const;
|
||||
bool RemoveAnalyzer(const string& file_id, const RecordVal* args) const;
|
||||
|
||||
/**
|
||||
* @return whether the file mapped to \a unique is being ignored.
|
||||
* @return whether the file mapped to \a file_id is being ignored.
|
||||
*/
|
||||
bool IsIgnored(const string& unique);
|
||||
bool IsIgnored(const string& file_id);
|
||||
|
||||
protected:
|
||||
friend class FileTimer;
|
||||
|
||||
typedef map<string, File*> StrMap;
|
||||
typedef set<string> StrSet;
|
||||
typedef map<FileID, File*> IDMap;
|
||||
typedef set<string> IDSet;
|
||||
typedef map<string, File*> IDMap;
|
||||
|
||||
/**
|
||||
* @return the File object mapped to \a unique or a null pointer if analysis
|
||||
* is being ignored for the associated file. An File object may be
|
||||
* created if a mapping doesn't exist, and if it did exist, the
|
||||
* activity time is refreshed along with any connection-related
|
||||
* fields.
|
||||
* @return the File object mapped to #current_file_id or a null pointer if
|
||||
* analysis is being ignored for the associated file. An File
|
||||
* object may be created if a mapping doesn't exist, and if it did
|
||||
* exist, the activity time is refreshed along with any
|
||||
* connection-related fields.
|
||||
*/
|
||||
File* GetFile(const string& unique, Connection* conn = 0,
|
||||
File* GetFile(Connection* conn = 0,
|
||||
AnalyzerTag::Tag tag = AnalyzerTag::Error,
|
||||
bool is_orig = false, bool update_conn = true);
|
||||
|
||||
|
@ -140,24 +132,24 @@ protected:
|
|||
* @return the File object mapped to \a file_id, or a null pointer if no
|
||||
* mapping exists.
|
||||
*/
|
||||
File* Lookup(const FileID& file_id) const;
|
||||
File* Lookup(const string& file_id) const;
|
||||
|
||||
/**
|
||||
* Evaluate timeout policy for a file and remove the File object mapped to
|
||||
* \a file_id if needed.
|
||||
*/
|
||||
void Timeout(const FileID& file_id, bool is_terminating = ::terminating);
|
||||
void Timeout(const string& file_id, bool is_terminating = ::terminating);
|
||||
|
||||
/**
|
||||
* Immediately remove file_analysis::File object associated with \a unique.
|
||||
* @return false if file string did not map to anything, else true.
|
||||
* Immediately remove file_analysis::File object associated with \a file_id.
|
||||
* @return false if file id string did not map to anything, else true.
|
||||
*/
|
||||
bool RemoveFile(const string& unique);
|
||||
bool RemoveFile(const string& file_id);
|
||||
|
||||
/**
|
||||
* Sets #current_handle to a unique file handle string based on what the
|
||||
* \c get_file_handle event derives from the connection params. The
|
||||
* event queue is flushed so that we can get the handle value immediately.
|
||||
* Sets #current_file_id to a hash of a unique file handle string based on
|
||||
* what the \c get_file_handle event derives from the connection params.
|
||||
* Event queue is flushed so that we can get the handle value immediately.
|
||||
*/
|
||||
void GetFileHandle(AnalyzerTag::Tag tag, Connection* c, bool is_orig);
|
||||
|
||||
|
@ -167,10 +159,9 @@ protected:
|
|||
static bool IsDisabled(AnalyzerTag::Tag tag);
|
||||
|
||||
private:
|
||||
StrMap str_map; /**< Map unique string to file_analysis::File. */
|
||||
IDMap id_map; /**< Map file ID to file_analysis::File records. */
|
||||
StrSet ignored; /**< Ignored files. Will be finally removed on EOF. */
|
||||
string current_handle; /**< Last file handle set by get_file_handle event.*/
|
||||
IDSet ignored; /**< Ignored files. Will be finally removed on EOF. */
|
||||
string current_file_id; /**< Hash of what get_file_handle event sets.*/
|
||||
|
||||
static TableVal* disabled; /**< Table of disabled analyzers. */
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue