mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
FileAnalysis: optimize get_file_handle event queueing.
When a file handle is needed and the last event in the queue is also a get_file_handle event with the same arguments, instead of queueing a new event, just remember to cache/re-use the resulting handle from the previous event. This depends on get_file_handle handlers not changing global state that is also used to derive the file handle string.
This commit is contained in:
parent
390358b70c
commit
fc267d010d
10 changed files with 88 additions and 37 deletions
|
@ -18,9 +18,6 @@ export {
|
||||||
|
|
||||||
local info: FTP::Info = ftp_data_expected[c$id$resp_h, c$id$resp_p];
|
local info: FTP::Info = ftp_data_expected[c$id$resp_h, c$id$resp_p];
|
||||||
|
|
||||||
local rval = fmt("%s %s %s", ANALYZER_FTP_DATA, c$start_time,
|
|
||||||
id_string(c$id));
|
|
||||||
|
|
||||||
if ( info$passive )
|
if ( info$passive )
|
||||||
# FTP client initiates data channel.
|
# FTP client initiates data channel.
|
||||||
if ( is_orig )
|
if ( is_orig )
|
||||||
|
@ -28,12 +25,14 @@ export {
|
||||||
return "";
|
return "";
|
||||||
else
|
else
|
||||||
# Do care about FTP server data.
|
# Do care about FTP server data.
|
||||||
return rval;
|
return cat(ANALYZER_FTP_DATA, " ", c$start_time, " ",
|
||||||
|
id_string(c$id));
|
||||||
else
|
else
|
||||||
# FTP server initiates dta channel.
|
# FTP server initiates dta channel.
|
||||||
if ( is_orig )
|
if ( is_orig )
|
||||||
# Do care about FTP server data.
|
# Do care about FTP server data.
|
||||||
return rval;
|
return cat(ANALYZER_FTP_DATA, " ", c$start_time, " ",
|
||||||
|
id_string(c$id));
|
||||||
else
|
else
|
||||||
# Don't care about FTP client data.
|
# Don't care about FTP client data.
|
||||||
return "";
|
return "";
|
||||||
|
|
|
@ -18,11 +18,11 @@ export {
|
||||||
if ( ! c?$http ) return "";
|
if ( ! c?$http ) return "";
|
||||||
|
|
||||||
if ( c$http$range_request )
|
if ( c$http$range_request )
|
||||||
return fmt("%s %s %s %s", ANALYZER_HTTP, is_orig, c$id$orig_h,
|
return cat(ANALYZER_HTTP, " ", is_orig, " ", c$id$orig_h,
|
||||||
build_url(c$http));
|
" ", build_url(c$http));
|
||||||
|
|
||||||
return fmt("%s %s %s %s %s", ANALYZER_HTTP, c$start_time, is_orig,
|
return cat(ANALYZER_HTTP, " ", c$start_time, " ", is_orig,
|
||||||
c$http$trans_depth, id_string(c$id));
|
" ", c$http$trans_depth, " ", id_string(c$id));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,7 @@ export {
|
||||||
function get_file_handle(c: connection, is_orig: bool): string
|
function get_file_handle(c: connection, is_orig: bool): string
|
||||||
{
|
{
|
||||||
if ( is_orig ) return "";
|
if ( is_orig ) return "";
|
||||||
return fmt("%s %s %s", ANALYZER_IRC_DATA, c$start_time,
|
return cat(ANALYZER_IRC_DATA, " ", c$start_time, " ",
|
||||||
id_string(c$id));
|
id_string(c$id));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,8 +17,8 @@ export {
|
||||||
{
|
{
|
||||||
if ( ! c?$smtp ) return "";
|
if ( ! c?$smtp ) return "";
|
||||||
|
|
||||||
return fmt("%s %s %s %s", ANALYZER_SMTP, c$start_time,
|
return cat(ANALYZER_SMTP, " ", c$start_time, " ",
|
||||||
c$smtp$trans_depth, c$smtp_state$mime_level);
|
c$smtp$trans_depth, " ", c$smtp_state$mime_level);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -118,6 +118,9 @@ public:
|
||||||
|
|
||||||
void Describe(ODesc* d) const;
|
void Describe(ODesc* d) const;
|
||||||
|
|
||||||
|
EventHandlerPtr Tail() { return tail ? tail->handler : EventHandlerPtr(); }
|
||||||
|
val_list* TailArgs() { return tail ? tail->args : 0; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void QueueEvent(Event* event);
|
void QueueEvent(Event* event);
|
||||||
|
|
||||||
|
|
|
@ -6986,7 +6986,8 @@ event bro_script_loaded%(path: string, level: count%);
|
||||||
## belongs. All incoming data to the framework is buffered, and depends
|
## belongs. All incoming data to the framework is buffered, and depends
|
||||||
## on a handler for this event to return a string value that uniquely
|
## on a handler for this event to return a string value that uniquely
|
||||||
## identifies a file. Among all handlers of this event, exactly one must
|
## identifies a file. Among all handlers of this event, exactly one must
|
||||||
## call :bro:see:`return_file_handle`.
|
## call :bro:see:`return_file_handle`. Handlers of this event must not
|
||||||
|
## change any global state.
|
||||||
##
|
##
|
||||||
## tag: The analyzer which is carrying the file data.
|
## tag: The analyzer which is carrying the file data.
|
||||||
##
|
##
|
||||||
|
|
|
@ -185,6 +185,13 @@ double Info::GetTimeoutInterval() const
|
||||||
return LookupFieldDefaultInterval(timeout_interval_idx);
|
return LookupFieldDefaultInterval(timeout_interval_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string Info::GetSource() const
|
||||||
|
{
|
||||||
|
Val* v = val->Lookup(source_idx);
|
||||||
|
if ( ! v ) return "";
|
||||||
|
return v->AsStringVal()->CheckString();
|
||||||
|
}
|
||||||
|
|
||||||
RecordVal* Info::GetResults(RecordVal* args) const
|
RecordVal* Info::GetResults(RecordVal* args) const
|
||||||
{
|
{
|
||||||
TableVal* actions_table = val->Lookup(actions_idx)->AsTableVal();
|
TableVal* actions_table = val->Lookup(actions_idx)->AsTableVal();
|
||||||
|
@ -243,18 +250,6 @@ bool Info::BufferBOF(const u_char* data, uint64 len)
|
||||||
|
|
||||||
uint64 desired_size = LookupFieldDefaultCount(bof_buffer_size_idx);
|
uint64 desired_size = LookupFieldDefaultCount(bof_buffer_size_idx);
|
||||||
|
|
||||||
/* Leaving out this optimization (I think) for now to keep things simpler.
|
|
||||||
// If first chunk satisfies desired size, do everything now without copying.
|
|
||||||
if ( bof_buffer.chunks.empty() && len >= desired_size )
|
|
||||||
{
|
|
||||||
bof_buffer.full = bof_buffer.replayed = true;
|
|
||||||
val->Assign(bof_buffer_idx, new StringVal(new BroString(data, len, 0)));
|
|
||||||
file_mgr->EvaluatePolicy(TRIGGER_BOF_BUFFER, this);
|
|
||||||
// TODO: libmagic stuff
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
bof_buffer.chunks.push_back(new BroString(data, len, 0));
|
bof_buffer.chunks.push_back(new BroString(data, len, 0));
|
||||||
bof_buffer.size += len;
|
bof_buffer.size += len;
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,11 @@ public:
|
||||||
*/
|
*/
|
||||||
double GetTimeoutInterval() const;
|
double GetTimeoutInterval() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return value of the "source" field from #val record.
|
||||||
|
*/
|
||||||
|
string GetSource() const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return value of the "file_id" field from #val record.
|
* @return value of the "file_id" field from #val record.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -34,11 +34,17 @@ void Manager::ReceiveHandle(const string& handle)
|
||||||
if ( pending.empty() )
|
if ( pending.empty() )
|
||||||
reporter->InternalError("File analysis underflow");
|
reporter->InternalError("File analysis underflow");
|
||||||
|
|
||||||
PendingFile* pf = pending.front();
|
int use_count = cache.front();
|
||||||
if ( ! handle.empty() )
|
cache.pop();
|
||||||
pf->Finish(handle);
|
|
||||||
delete pf;
|
for ( int i = 0; i < use_count; ++i )
|
||||||
pending.pop();
|
{
|
||||||
|
PendingFile* pf = pending.front();
|
||||||
|
if ( ! handle.empty() )
|
||||||
|
pf->Finish(handle);
|
||||||
|
delete pf;
|
||||||
|
pending.pop();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Manager::EventDrainDone()
|
void Manager::EventDrainDone()
|
||||||
|
@ -330,11 +336,50 @@ bool Manager::IsDisabled(AnalyzerTag::Tag tag)
|
||||||
return rval;
|
return rval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool CheckArgEquality(AnalyzerTag::Tag tag, Connection* conn,
|
||||||
|
bool is_orig, val_list* other_args)
|
||||||
|
{
|
||||||
|
if ( ! other_args ) return false;
|
||||||
|
if ( (*other_args)[0]->AsCount() != (bro_uint_t) tag ) return false;
|
||||||
|
if ( (*other_args)[2]->AsBool() != is_orig ) return false;
|
||||||
|
|
||||||
|
RecordVal* id = (*other_args)[1]->AsRecordVal()->Lookup(
|
||||||
|
connection_type->FieldOffset("id"))->AsRecordVal();
|
||||||
|
|
||||||
|
PortVal* orig_p = id->Lookup(
|
||||||
|
conn_id->FieldOffset("orig_p"))->AsPortVal();
|
||||||
|
|
||||||
|
if ( orig_p->Port() != ntohs(conn->OrigPort()) ) return false;
|
||||||
|
if ( orig_p->PortType() != conn->ConnTransport() ) return false;
|
||||||
|
|
||||||
|
PortVal* resp_p = id->Lookup(
|
||||||
|
conn_id->FieldOffset("resp_p"))->AsPortVal();
|
||||||
|
|
||||||
|
if ( resp_p->Port() != ntohs(conn->RespPort()) ) return false;
|
||||||
|
|
||||||
|
if ( id->Lookup(conn_id->FieldOffset("orig_h"))->AsAddr() !=
|
||||||
|
conn->OrigAddr() ) return false;
|
||||||
|
|
||||||
|
if ( id->Lookup(conn_id->FieldOffset("resp_h"))->AsAddr() !=
|
||||||
|
conn->RespAddr() ) return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool Manager::QueueHandleEvent(AnalyzerTag::Tag tag, Connection* conn,
|
bool Manager::QueueHandleEvent(AnalyzerTag::Tag tag, Connection* conn,
|
||||||
bool is_orig)
|
bool is_orig)
|
||||||
{
|
{
|
||||||
if ( ! get_file_handle ) return false;
|
if ( ! get_file_handle ) return false;
|
||||||
|
|
||||||
|
if ( mgr.Tail() == get_file_handle &&
|
||||||
|
CheckArgEquality(tag, conn, is_orig, mgr.TailArgs()) )
|
||||||
|
{
|
||||||
|
cache.front()++;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
cache.push(1);
|
||||||
|
|
||||||
val_list* vl = new val_list();
|
val_list* vl = new val_list();
|
||||||
vl->append(new Val(tag, TYPE_COUNT));
|
vl->append(new Val(tag, TYPE_COUNT));
|
||||||
vl->append(conn->BuildConnVal());
|
vl->append(conn->BuildConnVal());
|
||||||
|
|
|
@ -130,6 +130,7 @@ protected:
|
||||||
typedef set<string> StrSet;
|
typedef set<string> StrSet;
|
||||||
typedef map<FileID, Info*> IDMap;
|
typedef map<FileID, Info*> IDMap;
|
||||||
typedef queue<PendingFile*> PendingQueue;
|
typedef queue<PendingFile*> PendingQueue;
|
||||||
|
typedef queue<int> HandleCache;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return the Info object mapped to \a unique or a null pointer if analysis
|
* @return the Info object mapped to \a unique or a null pointer if analysis
|
||||||
|
@ -164,22 +165,24 @@ protected:
|
||||||
*/
|
*/
|
||||||
bool IsIgnored(const string& unique);
|
bool IsIgnored(const string& unique);
|
||||||
|
|
||||||
/**
|
|
||||||
* @return whether file analysis is disabled for the given analyzer.
|
|
||||||
*/
|
|
||||||
static bool IsDisabled(AnalyzerTag::Tag tag);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Queues \c get_file_handle event in order to retrieve unique file handle.
|
* Queues \c get_file_handle event in order to retrieve unique file handle.
|
||||||
* @return true if there is a handler for the event, else false.
|
* @return true if there is a handler for the event, else false.
|
||||||
*/
|
*/
|
||||||
static bool QueueHandleEvent(AnalyzerTag::Tag tag, Connection* conn,
|
bool QueueHandleEvent(AnalyzerTag::Tag tag, Connection* conn,
|
||||||
bool is_orig);
|
bool is_orig);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return whether file analysis is disabled for the given analyzer.
|
||||||
|
*/
|
||||||
|
static bool IsDisabled(AnalyzerTag::Tag tag);
|
||||||
|
|
||||||
StrMap str_map; /**< Map unique strings to \c FileAnalysis::Info records. */
|
StrMap str_map; /**< Map unique strings to \c FileAnalysis::Info records. */
|
||||||
IDMap id_map; /**< Map file IDs to \c FileAnalysis::Info records. */
|
IDMap id_map; /**< Map file IDs to \c FileAnalysis::Info records. */
|
||||||
StrSet ignored; /**< Ignored files. Will be finally removed on EOF. */
|
StrSet ignored; /**< Ignored files. Will be finally removed on EOF. */
|
||||||
PendingQueue pending; /**< Files awaiting a unique handle. */
|
PendingQueue pending; /**< Files awaiting a unique handle. */
|
||||||
|
HandleCache cache; /**< The number of times a received file handle can be
|
||||||
|
used to pop the #pending queue. */
|
||||||
|
|
||||||
static TableVal* disabled; /**< Table of disabled analyzers. */
|
static TableVal* disabled; /**< Table of disabled analyzers. */
|
||||||
};
|
};
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue