mirror of
https://github.com/zeek/zeek.git
synced 2025-10-04 23:58:20 +00:00
Merge remote-tracking branch 'origin/topic/jsiwek/file-analysis' into topic/seth/file-analysis-exe-analyzer
Conflicts: src/file_analysis/ActionSet.cc
This commit is contained in:
commit
a624dd61c0
27 changed files with 80205 additions and 159553 deletions
|
@ -11,13 +11,11 @@ using namespace file_analysis;
|
|||
|
||||
// keep in order w/ declared enum values in file_analysis.bif
|
||||
static ActionInstantiator action_factory[] = {
|
||||
Extract::Instantiate,
|
||||
MD5::Instantiate,
|
||||
SHA1::Instantiate,
|
||||
SHA256::Instantiate,
|
||||
DataEvent::Instantiate,
|
||||
|
||||
PE_Analyzer::Instantiate,
|
||||
file_analysis::Extract::Instantiate,
|
||||
file_analysis::MD5::Instantiate,
|
||||
file_analysis::SHA1::Instantiate,
|
||||
file_analysis::SHA256::Instantiate,
|
||||
file_analysis::DataEvent::Instantiate,
|
||||
};
|
||||
|
||||
static void action_del_func(void* v)
|
||||
|
|
|
@ -13,6 +13,11 @@ namespace file_analysis {
|
|||
class Info;
|
||||
declare(PDict,Action);
|
||||
|
||||
/**
|
||||
* A set of file analysis actions indexed by ActionArgs. Allows queueing
|
||||
* of addition/removals so that those modifications can happen at well-defined
|
||||
* times (e.g. to make sure a loop iterator isn't invalidated).
|
||||
*/
|
||||
class ActionSet {
|
||||
public:
|
||||
|
||||
|
|
|
@ -81,7 +81,8 @@ void Info::StaticInit()
|
|||
|
||||
Info::Info(const string& unique, Connection* conn, AnalyzerTag::Tag tag)
|
||||
: file_id(""), unique(unique), val(0), postpone_timeout(false),
|
||||
need_reassembly(false), done(false), actions(this)
|
||||
first_chunk(true), need_type(false), need_reassembly(false), done(false),
|
||||
actions(this)
|
||||
{
|
||||
StaticInit();
|
||||
|
||||
|
@ -134,11 +135,23 @@ void Info::UpdateConnectionFields(Connection* conn)
|
|||
|
||||
Val* conns = val->Lookup(conns_idx);
|
||||
|
||||
bool is_first = false;
|
||||
|
||||
if ( ! conns )
|
||||
{
|
||||
is_first = true;
|
||||
val->Assign(conns_idx, conns = empty_connection_table());
|
||||
}
|
||||
|
||||
Val* idx = get_conn_id_val(conn);
|
||||
conns->AsTableVal()->Assign(idx, conn->BuildConnVal());
|
||||
if ( ! conns->AsTableVal()->Lookup(idx) )
|
||||
{
|
||||
conns->AsTableVal()->Assign(idx, conn->BuildConnVal());
|
||||
if ( ! is_first )
|
||||
file_mgr->EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_NEW_CONN,
|
||||
this);
|
||||
}
|
||||
|
||||
Unref(idx);
|
||||
}
|
||||
|
||||
|
@ -162,7 +175,7 @@ int Info::Idx(const string& field)
|
|||
{
|
||||
int rval = BifType::Record::FileAnalysis::Info->FieldOffset(field.c_str());
|
||||
if ( rval < 0 )
|
||||
reporter->InternalError("Unkown FileAnalysis::Info field: %s",
|
||||
reporter->InternalError("Unknown FileAnalysis::Info field: %s",
|
||||
field.c_str());
|
||||
return rval;
|
||||
}
|
||||
|
@ -172,6 +185,13 @@ double Info::GetTimeoutInterval() const
|
|||
return LookupFieldDefaultInterval(timeout_interval_idx);
|
||||
}
|
||||
|
||||
string Info::GetSource() const
|
||||
{
|
||||
Val* v = val->Lookup(source_idx);
|
||||
if ( ! v ) return "";
|
||||
return v->AsStringVal()->CheckString();
|
||||
}
|
||||
|
||||
RecordVal* Info::GetResults(RecordVal* args) const
|
||||
{
|
||||
TableVal* actions_table = val->Lookup(actions_idx)->AsTableVal();
|
||||
|
@ -230,18 +250,6 @@ bool Info::BufferBOF(const u_char* data, uint64 len)
|
|||
|
||||
uint64 desired_size = LookupFieldDefaultCount(bof_buffer_size_idx);
|
||||
|
||||
/* Leaving out this optimization (I think) for now to keep things simpler.
|
||||
// If first chunk satisfies desired size, do everything now without copying.
|
||||
if ( bof_buffer.chunks.empty() && len >= desired_size )
|
||||
{
|
||||
bof_buffer.full = bof_buffer.replayed = true;
|
||||
val->Assign(bof_buffer_idx, new StringVal(new BroString(data, len, 0)));
|
||||
file_mgr->EvaluatePolicy(TRIGGER_BOF_BUFFER, this);
|
||||
// TODO: libmagic stuff
|
||||
return false;
|
||||
}
|
||||
*/
|
||||
|
||||
bof_buffer.chunks.push_back(new BroString(data, len, 0));
|
||||
bof_buffer.size += len;
|
||||
|
||||
|
@ -254,18 +262,10 @@ bool Info::BufferBOF(const u_char* data, uint64 len)
|
|||
return true;
|
||||
}
|
||||
|
||||
void Info::ReplayBOF()
|
||||
bool Info::DetectTypes(const u_char* data, uint64 len)
|
||||
{
|
||||
if ( bof_buffer.replayed ) return;
|
||||
bof_buffer.replayed = true;
|
||||
|
||||
if ( bof_buffer.chunks.empty() ) return;
|
||||
|
||||
BroString* bs = concatenate(bof_buffer.chunks);
|
||||
const char* desc = bro_magic_buffer(magic, bs->Bytes(), bs->Len());
|
||||
const char* mime = bro_magic_buffer(magic_mime, bs->Bytes(), bs->Len());
|
||||
|
||||
val->Assign(bof_buffer_idx, new StringVal(bs));
|
||||
const char* desc = bro_magic_buffer(magic, data, len);
|
||||
const char* mime = bro_magic_buffer(magic_mime, data, len);
|
||||
|
||||
if ( desc )
|
||||
val->Assign(file_type_idx, new StringVal(desc));
|
||||
|
@ -273,10 +273,29 @@ void Info::ReplayBOF()
|
|||
if ( mime )
|
||||
val->Assign(mime_type_idx, new StringVal(mime));
|
||||
|
||||
return desc || mime;
|
||||
}
|
||||
|
||||
void Info::ReplayBOF()
|
||||
{
|
||||
if ( bof_buffer.replayed ) return;
|
||||
bof_buffer.replayed = true;
|
||||
|
||||
if ( bof_buffer.chunks.empty() )
|
||||
{
|
||||
// Since we missed the beginning, try file type detect on next data in.
|
||||
need_type = true;
|
||||
return;
|
||||
}
|
||||
|
||||
BroString* bs = concatenate(bof_buffer.chunks);
|
||||
val->Assign(bof_buffer_idx, new StringVal(bs));
|
||||
bool have_type = DetectTypes(bs->Bytes(), bs->Len());
|
||||
|
||||
using BifEnum::FileAnalysis::TRIGGER_BOF_BUFFER;
|
||||
file_mgr->EvaluatePolicy(TRIGGER_BOF_BUFFER, this);
|
||||
|
||||
if ( desc || mime )
|
||||
if ( have_type )
|
||||
file_mgr->EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_TYPE, this);
|
||||
|
||||
for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i )
|
||||
|
@ -286,7 +305,17 @@ void Info::ReplayBOF()
|
|||
void Info::DataIn(const u_char* data, uint64 len, uint64 offset)
|
||||
{
|
||||
actions.DrainModifications();
|
||||
// TODO: attempt libmagic stuff here before doing reassembly?
|
||||
|
||||
if ( first_chunk )
|
||||
{
|
||||
if ( DetectTypes(data, len) )
|
||||
{
|
||||
file_mgr->EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_TYPE, this);
|
||||
actions.DrainModifications();
|
||||
}
|
||||
|
||||
first_chunk = false;
|
||||
}
|
||||
|
||||
Action* act = 0;
|
||||
IterCookie* c = actions.InitForIteration();
|
||||
|
@ -316,6 +345,17 @@ void Info::DataIn(const u_char* data, uint64 len)
|
|||
|
||||
if ( BufferBOF(data, len) ) return;
|
||||
|
||||
if ( need_type )
|
||||
{
|
||||
if ( DetectTypes(data, len) )
|
||||
{
|
||||
file_mgr->EvaluatePolicy(BifEnum::FileAnalysis::TRIGGER_TYPE, this);
|
||||
actions.DrainModifications();
|
||||
}
|
||||
|
||||
need_type = false;
|
||||
}
|
||||
|
||||
Action* act = 0;
|
||||
IterCookie* c = actions.InitForIteration();
|
||||
|
||||
|
|
|
@ -32,6 +32,11 @@ public:
|
|||
*/
|
||||
double GetTimeoutInterval() const;
|
||||
|
||||
/**
|
||||
* @return value of the "source" field from #val record.
|
||||
*/
|
||||
string GetSource() const;
|
||||
|
||||
/**
|
||||
* @return value of the "file_id" field from #val record.
|
||||
*/
|
||||
|
@ -155,10 +160,19 @@ protected:
|
|||
*/
|
||||
void ReplayBOF();
|
||||
|
||||
/**
|
||||
* Does file/mime type detection and assigns types (if available) to
|
||||
* corresponding fields in #val.
|
||||
* @return whether a file or mime type was available.
|
||||
*/
|
||||
bool DetectTypes(const u_char* data, uint64 len);
|
||||
|
||||
FileID file_id; /**< A pretty hash that likely identifies file*/
|
||||
string unique; /**< A string that uniquely identifies file */
|
||||
RecordVal* val; /**< \c FileAnalysis::Info from script layer. */
|
||||
bool postpone_timeout; /**< Whether postponing timeout is requested. */
|
||||
bool first_chunk; /**< Track first non-linear chunk. */
|
||||
bool need_type; /**< Flags next data input to be magic typed. */
|
||||
bool need_reassembly; /**< Whether file stream reassembly is needed. */
|
||||
bool done; /**< If this object is about to be deleted. */
|
||||
ActionSet actions;
|
||||
|
|
|
@ -34,11 +34,17 @@ void Manager::ReceiveHandle(const string& handle)
|
|||
if ( pending.empty() )
|
||||
reporter->InternalError("File analysis underflow");
|
||||
|
||||
PendingFile* pf = pending.front();
|
||||
if ( ! handle.empty() )
|
||||
pf->Finish(handle);
|
||||
delete pf;
|
||||
pending.pop();
|
||||
int use_count = cache.front();
|
||||
cache.pop();
|
||||
|
||||
for ( int i = 0; i < use_count; ++i )
|
||||
{
|
||||
PendingFile* pf = pending.front();
|
||||
if ( ! handle.empty() )
|
||||
pf->Finish(handle);
|
||||
delete pf;
|
||||
pending.pop();
|
||||
}
|
||||
}
|
||||
|
||||
void Manager::EventDrainDone()
|
||||
|
@ -330,11 +336,50 @@ bool Manager::IsDisabled(AnalyzerTag::Tag tag)
|
|||
return rval;
|
||||
}
|
||||
|
||||
static bool CheckArgEquality(AnalyzerTag::Tag tag, Connection* conn,
|
||||
bool is_orig, val_list* other_args)
|
||||
{
|
||||
if ( ! other_args ) return false;
|
||||
if ( (*other_args)[0]->AsCount() != (bro_uint_t) tag ) return false;
|
||||
if ( (*other_args)[2]->AsBool() != is_orig ) return false;
|
||||
|
||||
RecordVal* id = (*other_args)[1]->AsRecordVal()->Lookup(
|
||||
connection_type->FieldOffset("id"))->AsRecordVal();
|
||||
|
||||
PortVal* orig_p = id->Lookup(
|
||||
conn_id->FieldOffset("orig_p"))->AsPortVal();
|
||||
|
||||
if ( orig_p->Port() != ntohs(conn->OrigPort()) ) return false;
|
||||
if ( orig_p->PortType() != conn->ConnTransport() ) return false;
|
||||
|
||||
PortVal* resp_p = id->Lookup(
|
||||
conn_id->FieldOffset("resp_p"))->AsPortVal();
|
||||
|
||||
if ( resp_p->Port() != ntohs(conn->RespPort()) ) return false;
|
||||
|
||||
if ( id->Lookup(conn_id->FieldOffset("orig_h"))->AsAddr() !=
|
||||
conn->OrigAddr() ) return false;
|
||||
|
||||
if ( id->Lookup(conn_id->FieldOffset("resp_h"))->AsAddr() !=
|
||||
conn->RespAddr() ) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Manager::QueueHandleEvent(AnalyzerTag::Tag tag, Connection* conn,
|
||||
bool is_orig)
|
||||
{
|
||||
if ( ! get_file_handle ) return false;
|
||||
|
||||
if ( mgr.Tail() == get_file_handle &&
|
||||
CheckArgEquality(tag, conn, is_orig, mgr.TailArgs()) )
|
||||
{
|
||||
cache.front()++;
|
||||
return true;
|
||||
}
|
||||
|
||||
cache.push(1);
|
||||
|
||||
val_list* vl = new val_list();
|
||||
vl->append(new Val(tag, TYPE_COUNT));
|
||||
vl->append(conn->BuildConnVal());
|
||||
|
|
|
@ -130,6 +130,7 @@ protected:
|
|||
typedef set<string> StrSet;
|
||||
typedef map<FileID, Info*> IDMap;
|
||||
typedef queue<PendingFile*> PendingQueue;
|
||||
typedef queue<int> HandleCache;
|
||||
|
||||
/**
|
||||
* @return the Info object mapped to \a unique or a null pointer if analysis
|
||||
|
@ -164,22 +165,24 @@ protected:
|
|||
*/
|
||||
bool IsIgnored(const string& unique);
|
||||
|
||||
/**
|
||||
* @return whether file analysis is disabled for the given analyzer.
|
||||
*/
|
||||
static bool IsDisabled(AnalyzerTag::Tag tag);
|
||||
|
||||
/**
|
||||
* Queues \c get_file_handle event in order to retrieve unique file handle.
|
||||
* @return true if there is a handler for the event, else false.
|
||||
*/
|
||||
static bool QueueHandleEvent(AnalyzerTag::Tag tag, Connection* conn,
|
||||
bool is_orig);
|
||||
bool QueueHandleEvent(AnalyzerTag::Tag tag, Connection* conn,
|
||||
bool is_orig);
|
||||
|
||||
/**
|
||||
* @return whether file analysis is disabled for the given analyzer.
|
||||
*/
|
||||
static bool IsDisabled(AnalyzerTag::Tag tag);
|
||||
|
||||
StrMap str_map; /**< Map unique strings to \c FileAnalysis::Info records. */
|
||||
IDMap id_map; /**< Map file IDs to \c FileAnalysis::Info records. */
|
||||
StrSet ignored; /**< Ignored files. Will be finally removed on EOF. */
|
||||
PendingQueue pending; /**< Files awaiting a unique handle. */
|
||||
HandleCache cache; /**< The number of times a received file handle can be
|
||||
used to pop the #pending queue. */
|
||||
|
||||
static TableVal* disabled; /**< Table of disabled analyzers. */
|
||||
};
|
||||
|
|
|
@ -7,6 +7,10 @@
|
|||
|
||||
namespace file_analysis {
|
||||
|
||||
/**
|
||||
* Provides buffering for file contents until the script-layer is able to
|
||||
* return a unique file handle for it.
|
||||
*/
|
||||
class PendingFile {
|
||||
public:
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue