zeek/src/file_analysis/Manager.cc
2020-07-31 16:22:41 -04:00

543 lines
13 KiB
C++

// See the file "COPYING" in the main distribution directory for copyright.
#include "Manager.h"
#include "File.h"
#include "Analyzer.h"
#include "Event.h"
#include "UID.h"
#include "digest.h"
#include "plugin/Manager.h"
#include "analyzer/Manager.h"
#include "file_analysis/file_analysis.bif.h"
#include <openssl/md5.h>
using namespace file_analysis;
using namespace std;
Manager::Manager()
: plugin::ComponentManager<file_analysis::Tag,
file_analysis::Component>("Files", "Tag"),
current_file_id(), magic_state(), cumulative_files(0), max_files(0)
{
}
Manager::~Manager()
{
for ( MIMEMap::iterator i = mime_types.begin(); i != mime_types.end(); i++ )
delete i->second;
// Have to assume that too much of Zeek has been shutdown by this point
// to do anything more than reclaim memory.
for ( const auto& entry : id_map )
delete entry.second;
delete magic_state;
}
void Manager::InitPreScript()
{
}
void Manager::InitPostScript()
{
}
void Manager::InitMagic()
{
delete magic_state;
magic_state = zeek::detail::rule_matcher->InitFileMagic();
}
void Manager::Terminate()
{
vector<string> keys;
keys.reserve(id_map.size());
for ( const auto& entry : id_map )
keys.push_back(entry.first);
for ( const string& key : keys )
Timeout(key, true);
mgr.Drain();
}
string Manager::HashHandle(const string& handle) const
{
hash128_t hash;
KeyedHash::StaticHash128(handle.data(), handle.size(), &hash);
return Bro::UID(bits_per_uid, hash, 2).Base62("F");
}
void Manager::SetHandle(const string& handle)
{
if ( handle.empty() )
return;
#ifdef DEBUG
if ( debug_logger.IsEnabled(DBG_FILE_ANALYSIS) )
{
zeek::String tmp{handle};
auto rendered = tmp.Render();
DBG_LOG(DBG_FILE_ANALYSIS, "Set current handle to %s", rendered);
delete [] rendered;
}
#endif
current_file_id = HashHandle(handle);
}
string Manager::DataIn(const u_char* data, uint64_t len, uint64_t offset,
const zeek::analyzer::Tag& tag, Connection* conn, bool is_orig,
const string& precomputed_id, const string& mime_type)
{
string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
File* file = GetFile(id, conn, tag, is_orig);
if ( ! file )
return "";
// This only has any effect when
// * called for the first time for a file
// * being called before file->DataIn is called for the first time (before data is
// added to the bof buffer).
// Afterwards SetMime just ignores what is passed to it. Thus this only has effect during
// the first Manager::DataIn call for each file.
if ( ! mime_type.empty() )
file->SetMime(mime_type);
file->DataIn(data, len, offset);
if ( file->IsComplete() )
{
RemoveFile(file->GetID());
return "";
}
return id;
}
string Manager::DataIn(const u_char* data, uint64_t len, const zeek::analyzer::Tag& tag,
Connection* conn, bool is_orig, const string& precomputed_id,
const string& mime_type)
{
string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
// Sequential data input shouldn't be going over multiple conns, so don't
// do the check to update connection set.
File* file = GetFile(id, conn, tag, is_orig, false);
if ( ! file )
return "";
if ( ! mime_type.empty() )
file->SetMime(mime_type);
file->DataIn(data, len);
if ( file->IsComplete() )
{
RemoveFile(file->GetID());
return "";
}
return id;
}
void Manager::DataIn(const u_char* data, uint64_t len, const string& file_id,
const string& source)
{
File* file = GetFile(file_id, nullptr, zeek::analyzer::Tag::Error, false, false,
source.c_str());
if ( ! file )
return;
file->DataIn(data, len);
if ( file->IsComplete() )
RemoveFile(file->GetID());
}
void Manager::EndOfFile(const zeek::analyzer::Tag& tag, Connection* conn)
{
EndOfFile(tag, conn, true);
EndOfFile(tag, conn, false);
}
void Manager::EndOfFile(const zeek::analyzer::Tag& tag, Connection* conn, bool is_orig)
{
// Don't need to create a file if we're just going to remove it right away.
RemoveFile(GetFileID(tag, conn, is_orig));
}
void Manager::EndOfFile(const string& file_id)
{
RemoveFile(file_id);
}
string Manager::Gap(uint64_t offset, uint64_t len, const zeek::analyzer::Tag& tag,
Connection* conn, bool is_orig, const string& precomputed_id)
{
string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
File* file = GetFile(id, conn, tag, is_orig);
if ( ! file )
return "";
file->Gap(offset, len);
return id;
}
string Manager::SetSize(uint64_t size, const zeek::analyzer::Tag& tag, Connection* conn,
bool is_orig, const string& precomputed_id)
{
string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
File* file = GetFile(id, conn, tag, is_orig);
if ( ! file )
return "";
file->SetTotalBytes(size);
if ( file->IsComplete() )
{
RemoveFile(file->GetID());
return "";
}
return id;
}
bool Manager::SetTimeoutInterval(const string& file_id, double interval) const
{
File* file = LookupFile(file_id);
if ( ! file )
return false;
if ( interval > 0 )
file->postpone_timeout = true;
file->SetTimeoutInterval(interval);
return true;
}
bool Manager::EnableReassembly(const string& file_id)
{
File* file = LookupFile(file_id);
if ( ! file )
return false;
file->EnableReassembly();
return true;
}
bool Manager::DisableReassembly(const string& file_id)
{
File* file = LookupFile(file_id);
if ( ! file )
return false;
file->DisableReassembly();
return true;
}
bool Manager::SetReassemblyBuffer(const string& file_id, uint64_t max)
{
File* file = LookupFile(file_id);
if ( ! file )
return false;
file->SetReassemblyBuffer(max);
return true;
}
bool Manager::SetExtractionLimit(const string& file_id, zeek::RecordVal* args,
uint64_t n) const
{ return SetExtractionLimit(file_id, {zeek::NewRef{}, args}, n); }
bool Manager::SetExtractionLimit(const string& file_id,
zeek::RecordValPtr args, uint64_t n) const
{
File* file = LookupFile(file_id);
if ( ! file )
return false;
return file->SetExtractionLimit(std::move(args), n);
}
bool Manager::AddAnalyzer(const string& file_id, const file_analysis::Tag& tag,
zeek::RecordVal* args) const
{ return AddAnalyzer(file_id, tag, {zeek::NewRef{}, args}); }
bool Manager::AddAnalyzer(const string& file_id, const file_analysis::Tag& tag,
zeek::RecordValPtr args) const
{
File* file = LookupFile(file_id);
if ( ! file )
return false;
return file->AddAnalyzer(tag, std::move(args));
}
bool Manager::RemoveAnalyzer(const string& file_id, const file_analysis::Tag& tag,
zeek::RecordVal* args) const
{ return RemoveAnalyzer(file_id, tag, {zeek::NewRef{}, args}); }
bool Manager::RemoveAnalyzer(const string& file_id, const file_analysis::Tag& tag,
zeek::RecordValPtr args) const
{
File* file = LookupFile(file_id);
if ( ! file )
return false;
return file->RemoveAnalyzer(tag, std::move(args));
}
File* Manager::GetFile(const string& file_id, Connection* conn,
const zeek::analyzer::Tag& tag, bool is_orig, bool update_conn,
const char* source_name)
{
if ( file_id.empty() )
return nullptr;
if ( IsIgnored(file_id) )
return nullptr;
File* rval = LookupFile(file_id);
if ( ! rval )
{
rval = new File(file_id,
source_name ? source_name
: zeek::analyzer_mgr->GetComponentName(tag),
conn, tag, is_orig);
id_map[file_id] = rval;
++cumulative_files;
if ( id_map.size() > max_files )
max_files = id_map.size();
rval->ScheduleInactivityTimer();
// Generate file_new after inserting it into manager's mapping
// in case script-layer calls back in to core from the event.
rval->FileEvent(file_new);
// Same for file_over_new_connection.
rval->RaiseFileOverNewConnection(conn, is_orig);
if ( IsIgnored(file_id) )
return nullptr;
}
else
{
rval->UpdateLastActivityTime();
if ( update_conn && rval->UpdateConnectionFields(conn, is_orig) )
rval->RaiseFileOverNewConnection(conn, is_orig);
}
return rval;
}
File* Manager::LookupFile(const string& file_id) const
{
const auto& entry = id_map.find(file_id);
if ( entry == id_map.end() )
return nullptr;
return entry->second;
}
void Manager::Timeout(const string& file_id, bool is_terminating)
{
File* file = LookupFile(file_id);
if ( ! file )
return;
file->postpone_timeout = false;
file->FileEvent(file_timeout);
if ( file->postpone_timeout && ! is_terminating )
{
DBG_LOG(DBG_FILE_ANALYSIS, "Postpone file analysis timeout for %s",
file->GetID().c_str());
file->UpdateLastActivityTime();
file->ScheduleInactivityTimer();
return;
}
DBG_LOG(DBG_FILE_ANALYSIS, "File analysis timeout for %s",
file->GetID().c_str());
RemoveFile(file->GetID());
}
bool Manager::IgnoreFile(const string& file_id)
{
if ( ! LookupFile(file_id) )
return false;
DBG_LOG(DBG_FILE_ANALYSIS, "Ignore FileID %s", file_id.c_str());
ignored.insert(file_id);
return true;
}
bool Manager::RemoveFile(const string& file_id)
{
// Can't remove from the dictionary/map right away as invoking EndOfFile
// may cause some events to be executed which actually depend on the file
// still being in the dictionary/map.
File* f = LookupFile(file_id);
if ( ! f )
return false;
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Remove file", file_id.c_str());
f->EndOfFile();
id_map.erase(file_id);
ignored.erase(file_id);
delete f;
return true;
}
bool Manager::IsIgnored(const string& file_id)
{
return ignored.find(file_id) != ignored.end();
}
string Manager::GetFileID(const zeek::analyzer::Tag& tag, Connection* c, bool is_orig)
{
current_file_id.clear();
if ( IsDisabled(tag) )
return "";
if ( ! get_file_handle )
return "";
DBG_LOG(DBG_FILE_ANALYSIS, "Raise get_file_handle() for protocol analyzer %s",
zeek::analyzer_mgr->GetComponentName(tag).c_str());
const auto& tagval = tag.AsVal();
mgr.Enqueue(get_file_handle, tagval, c->ConnVal(), zeek::val_mgr->Bool(is_orig));
mgr.Drain(); // need file handle immediately so we don't have to buffer data
return current_file_id;
}
bool Manager::IsDisabled(const zeek::analyzer::Tag& tag)
{
if ( ! disabled )
disabled = zeek::id::find_const("Files::disable")->AsTableVal();
auto index = zeek::val_mgr->Count(bool(tag));
auto yield = disabled->FindOrDefault(index);
if ( ! yield )
return false;
return yield->AsBool();
}
Analyzer* Manager::InstantiateAnalyzer(const Tag& tag, zeek::RecordVal* args, File* f) const
{ return InstantiateAnalyzer(tag, {zeek::NewRef{}, args}, f); }
Analyzer* Manager::InstantiateAnalyzer(const Tag& tag,
zeek::RecordValPtr args,
File* f) const
{
Component* c = Lookup(tag);
if ( ! c )
{
zeek::reporter->InternalWarning(
"unknown file analyzer instantiation request: %s",
tag.AsString().c_str());
return nullptr;
}
DBG_LOG(DBG_FILE_ANALYSIS, "[%s] Instantiate analyzer %s",
f->id.c_str(), GetComponentName(tag).c_str());
Analyzer* a;
if ( c->factory_func )
a = c->factory_func(std::move(args), f);
else if ( c->factory )
a = c->factory(args.get(), f);
else
{
zeek::reporter->InternalWarning("file analyzer %s cannot be instantiated "
"dynamically", c->CanonicalName().c_str());
return nullptr;
}
if ( ! a )
zeek::reporter->InternalError("file analyzer instantiation failed");
a->SetAnalyzerTag(tag);
return a;
}
zeek::detail::RuleMatcher::MIME_Matches* Manager::DetectMIME(
const u_char* data, uint64_t len,
zeek::detail::RuleMatcher::MIME_Matches* rval) const
{
if ( ! magic_state )
zeek::reporter->InternalError("file magic signature state not initialized");
rval = zeek::detail::rule_matcher->Match(magic_state, data, len, rval);
zeek::detail::rule_matcher->ClearFileMagicState(magic_state);
return rval;
}
string Manager::DetectMIME(const u_char* data, uint64_t len) const
{
zeek::detail::RuleMatcher::MIME_Matches matches;
DetectMIME(data, len, &matches);
if ( matches.empty() )
return "";
return *(matches.begin()->second.begin());
}
zeek::VectorValPtr file_analysis::GenMIMEMatchesVal(const zeek::detail::RuleMatcher::MIME_Matches& m)
{
static auto mime_matches = zeek::id::find_type<zeek::VectorType>("mime_matches");
static auto mime_match = zeek::id::find_type<zeek::RecordType>("mime_match");
auto rval = zeek::make_intrusive<zeek::VectorVal>(mime_matches);
for ( zeek::detail::RuleMatcher::MIME_Matches::const_iterator it = m.begin();
it != m.end(); ++it )
{
auto element = zeek::make_intrusive<zeek::RecordVal>(mime_match);
for ( set<string>::const_iterator it2 = it->second.begin();
it2 != it->second.end(); ++it2 )
{
element->Assign(0, zeek::val_mgr->Int(it->first));
element->Assign(1, zeek::make_intrusive<zeek::StringVal>(*it2));
}
rval->Assign(rval->Size(), std::move(element));
}
return rval;
}