Merge remote-tracking branch 'origin/topic/jsiwek/faf-cleanup'

Closes #1002.

* origin/topic/jsiwek/faf-cleanup:
  Move file analyzers to new plugin infrastructure.
  Add a general file analysis overview/how-to document.
  Improve file analysis doxygen comments.
  Improve tracking of HTTP file extraction (addresses #988).
  Fix HTTP multipart body file analysis.
  Remove logging of analyzers field of FileAnalysis::Info.
  Remove extraction counter in default file extraction scripts.
  Remove FileAnalysis::postpone_timeout.
  Make default get_file_handle handlers &priority=5.
  Add input interface to forward data for file analysis.
  File analysis framework interface simplifications.
This commit is contained in:
Robin Sommer 2013-07-03 16:22:43 -07:00
commit d8b05af7e5
127 changed files with 2458 additions and 1412 deletions

View file

@ -2,6 +2,7 @@
#include <vector>
#include <string>
#include <openssl/md5.h>
#include "Manager.h"
#include "File.h"
@ -9,12 +10,18 @@
#include "Var.h"
#include "Event.h"
#include "plugin/Manager.h"
using namespace file_analysis;
TableVal* Manager::disabled = 0;
string Manager::salt;
Manager::Manager()
{
tag_enum_type = new EnumType("FileAnalysis::Tag");
::ID* id = install_ID("Tag", "FileAnalysis", true, true);
add_type(id, tag_enum_type, 0, 0);
}
Manager::~Manager()
@ -22,9 +29,44 @@ Manager::~Manager()
Terminate();
}
void Manager::InitPreScript()
{
std::list<Component*> analyzers = plugin_mgr->Components<Component>();
for ( std::list<Component*>::const_iterator i = analyzers.begin();
i != analyzers.end(); ++i )
RegisterAnalyzerComponent(*i);
}
void Manager::RegisterAnalyzerComponent(Component* component)
{
const char* cname = component->CanonicalName();
if ( tag_enum_type->Lookup("FileAnalysis", cname) != -1 )
reporter->FatalError("File Analyzer %s defined more than once", cname);
DBG_LOG(DBG_FILE_ANALYSIS, "Registering analyzer %s (tag %s)",
component->Name(), component->Tag().AsString().c_str());
analyzers_by_name.insert(std::make_pair(cname, component));
analyzers_by_tag.insert(std::make_pair(component->Tag(), component));
analyzers_by_val.insert(std::make_pair(
component->Tag().AsEnumVal()->InternalInt(), component));
string id = fmt("ANALYZER_%s", cname);
tag_enum_type->AddName("FileAnalysis", id.c_str(),
component->Tag().AsEnumVal()->InternalInt(), true);
}
void Manager::InitPostScript()
{
#include "file_analysis.bif.init.cc"
}
void Manager::Terminate()
{
vector<FileID> keys;
vector<string> keys;
for ( IDMap::iterator it = id_map.begin(); it != id_map.end(); ++it )
keys.push_back(it->first);
@ -32,66 +74,77 @@ void Manager::Terminate()
Timeout(keys[i], true);
}
string Manager::HashHandle(const string& handle) const
{
if ( salt.empty() )
salt = BifConst::FileAnalysis::salt->CheckString();
char tmp[20];
uint64 hash[2];
string msg(handle + salt);
MD5(reinterpret_cast<const u_char*>(msg.data()), msg.size(),
reinterpret_cast<u_char*>(hash));
uitoa_n(hash[0], tmp, sizeof(tmp), 62);
return tmp;
}
void Manager::SetHandle(const string& handle)
{
current_handle = handle;
if ( handle.empty() )
return;
current_file_id = HashHandle(handle);
}
void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
analyzer::Tag tag, Connection* conn, bool is_orig)
{
if ( IsDisabled(tag) )
return;
GetFileHandle(tag, conn, is_orig);
DataIn(data, len, offset, GetFile(current_handle, conn, tag, is_orig));
}
File* file = GetFile(current_file_id, conn, tag, is_orig);
void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
const string& unique)
{
DataIn(data, len, offset, GetFile(unique));
}
void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
File* file)
{
if ( ! file )
return;
file->DataIn(data, len, offset);
if ( file->IsComplete() )
RemoveFile(file->GetUnique());
RemoveFile(file->GetID());
}
void Manager::DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
Connection* conn, bool is_orig)
{
if ( IsDisabled(tag) )
return;
GetFileHandle(tag, conn, is_orig);
// Sequential data input shouldn't be going over multiple conns, so don't
// do the check to update connection set.
DataIn(data, len, GetFile(current_handle, conn, tag, is_orig, false));
}
File* file = GetFile(current_file_id, conn, tag, is_orig, false);
void Manager::DataIn(const u_char* data, uint64 len, const string& unique)
{
DataIn(data, len, GetFile(unique));
}
void Manager::DataIn(const u_char* data, uint64 len, File* file)
{
if ( ! file )
return;
file->DataIn(data, len);
if ( file->IsComplete() )
RemoveFile(file->GetUnique());
RemoveFile(file->GetID());
}
void Manager::DataIn(const u_char* data, uint64 len, const string& file_id,
const string& source)
{
File* file = GetFile(file_id);
if ( ! file )
return;
if ( file->GetSource().empty() )
file->SetSource(source);
file->DataIn(data, len);
if ( file->IsComplete() )
RemoveFile(file->GetID());
}
void Manager::EndOfFile(analyzer::Tag tag, Connection* conn)
@ -102,35 +155,22 @@ void Manager::EndOfFile(analyzer::Tag tag, Connection* conn)
void Manager::EndOfFile(analyzer::Tag tag, Connection* conn, bool is_orig)
{
if ( IsDisabled(tag) )
return;
// Don't need to create a file if we're just going to remove it right away.
GetFileHandle(tag, conn, is_orig);
EndOfFile(current_handle);
RemoveFile(current_file_id);
}
void Manager::EndOfFile(const string& unique)
void Manager::EndOfFile(const string& file_id)
{
RemoveFile(unique);
RemoveFile(file_id);
}
void Manager::Gap(uint64 offset, uint64 len, analyzer::Tag tag,
Connection* conn, bool is_orig)
{
if ( IsDisabled(tag) )
return;
GetFileHandle(tag, conn, is_orig);
Gap(offset, len, GetFile(current_handle, conn, tag, is_orig));
}
File* file = GetFile(current_file_id, conn, tag, is_orig);
void Manager::Gap(uint64 offset, uint64 len, const string& unique)
{
Gap(offset, len, GetFile(unique));
}
void Manager::Gap(uint64 offset, uint64 len, File* file)
{
if ( ! file )
return;
@ -140,52 +180,33 @@ void Manager::Gap(uint64 offset, uint64 len, File* file)
void Manager::SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
bool is_orig)
{
if ( IsDisabled(tag) )
return;
GetFileHandle(tag, conn, is_orig);
SetSize(size, GetFile(current_handle, conn, tag, is_orig));
}
File* file = GetFile(current_file_id, conn, tag, is_orig);
void Manager::SetSize(uint64 size, const string& unique)
{
SetSize(size, GetFile(unique));
}
void Manager::SetSize(uint64 size, File* file)
{
if ( ! file )
return;
file->SetTotalBytes(size);
if ( file->IsComplete() )
RemoveFile(file->GetUnique());
RemoveFile(file->GetID());
}
bool Manager::PostponeTimeout(const FileID& file_id) const
bool Manager::SetTimeoutInterval(const string& file_id, double interval) const
{
File* file = Lookup(file_id);
if ( ! file )
return false;
file->postpone_timeout = true;
return true;
}
bool Manager::SetTimeoutInterval(const FileID& file_id, double interval) const
{
File* file = Lookup(file_id);
if ( ! file )
return false;
if ( interval > 0 )
file->postpone_timeout = true;
file->SetTimeoutInterval(interval);
return true;
}
bool Manager::AddAnalyzer(const FileID& file_id, RecordVal* args) const
bool Manager::AddAnalyzer(const string& file_id, RecordVal* args) const
{
File* file = Lookup(file_id);
@ -195,7 +216,7 @@ bool Manager::AddAnalyzer(const FileID& file_id, RecordVal* args) const
return file->AddAnalyzer(args);
}
bool Manager::RemoveAnalyzer(const FileID& file_id, const RecordVal* args) const
bool Manager::RemoveAnalyzer(const string& file_id, const RecordVal* args) const
{
File* file = Lookup(file_id);
@ -205,32 +226,23 @@ bool Manager::RemoveAnalyzer(const FileID& file_id, const RecordVal* args) const
return file->RemoveAnalyzer(args);
}
File* Manager::GetFile(const string& unique, Connection* conn,
File* Manager::GetFile(const string& file_id, Connection* conn,
analyzer::Tag tag, bool is_orig, bool update_conn)
{
if ( unique.empty() )
if ( file_id.empty() )
return 0;
if ( IsIgnored(unique) )
if ( IsIgnored(file_id) )
return 0;
File* rval = str_map[unique];
File* rval = id_map[file_id];
if ( ! rval )
{
rval = str_map[unique] = new File(unique, conn, tag, is_orig);
FileID id = rval->GetID();
if ( id_map[id] )
{
reporter->Error("Evicted duplicate file ID: %s", id.c_str());
RemoveFile(unique);
}
id_map[id] = rval;
rval = id_map[file_id] = new File(file_id, conn, tag, is_orig);
rval->ScheduleInactivityTimer();
if ( IsIgnored(unique) )
if ( IsIgnored(file_id) )
return 0;
}
else
@ -244,7 +256,7 @@ File* Manager::GetFile(const string& unique, Connection* conn,
return rval;
}
File* Manager::Lookup(const FileID& file_id) const
File* Manager::Lookup(const string& file_id) const
{
IDMap::const_iterator it = id_map.find(file_id);
@ -254,7 +266,7 @@ File* Manager::Lookup(const FileID& file_id) const
return it->second;
}
void Manager::Timeout(const FileID& file_id, bool is_terminating)
void Manager::Timeout(const string& file_id, bool is_terminating)
{
File* file = Lookup(file_id);
@ -277,53 +289,50 @@ void Manager::Timeout(const FileID& file_id, bool is_terminating)
DBG_LOG(DBG_FILE_ANALYSIS, "File analysis timeout for %s",
file->GetID().c_str());
RemoveFile(file->GetUnique());
RemoveFile(file->GetID());
}
bool Manager::IgnoreFile(const FileID& file_id)
bool Manager::IgnoreFile(const string& file_id)
{
if ( id_map.find(file_id) == id_map.end() )
return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Ignore FileID %s", file_id.c_str());
ignored.insert(file_id);
return true;
}
bool Manager::RemoveFile(const string& file_id)
{
IDMap::iterator it = id_map.find(file_id);
if ( it == id_map.end() )
return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Ignore FileID %s", file_id.c_str());
ignored.insert(it->second->GetUnique());
return true;
}
bool Manager::RemoveFile(const string& unique)
{
StrMap::iterator it = str_map.find(unique);
if ( it == str_map.end() )
return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", file_id.c_str());
it->second->EndOfFile();
FileID id = it->second->GetID();
DBG_LOG(DBG_FILE_ANALYSIS, "Remove FileID %s", id.c_str());
if ( ! id_map.erase(id) )
reporter->Error("No mapping for fileID %s", id.c_str());
ignored.erase(unique);
delete it->second;
str_map.erase(unique);
id_map.erase(file_id);
ignored.erase(file_id);
return true;
}
bool Manager::IsIgnored(const string& unique)
bool Manager::IsIgnored(const string& file_id)
{
return ignored.find(unique) != ignored.end();
return ignored.find(file_id) != ignored.end();
}
void Manager::GetFileHandle(analyzer::Tag tag, Connection* c, bool is_orig)
{
current_handle.clear();
current_file_id.clear();
if ( IsDisabled(tag) )
return;
if ( ! get_file_handle )
return;
@ -357,3 +366,31 @@ bool Manager::IsDisabled(analyzer::Tag tag)
return rval;
}
Analyzer* Manager::InstantiateAnalyzer(int tag, RecordVal* args, File* f) const
{
analyzer_map_by_val::const_iterator it = analyzers_by_val.find(tag);
if ( it == analyzers_by_val.end() )
reporter->InternalError("cannot instantiate unknown file analyzer: %d",
tag);
Component* c = it->second;
if ( ! c->Factory() )
reporter->InternalError("file analyzer %s cannot be instantiated "
"dynamically", c->CanonicalName());
return c->Factory()(args, f);
}
const char* Manager::GetAnalyzerName(int tag) const
{
analyzer_map_by_val::const_iterator it = analyzers_by_val.find(tag);
if ( it == analyzers_by_val.end() )
reporter->InternalError("cannot get name of unknown file analyzer: %d",
tag);
return it->second->CanonicalName();
}