mirror of
https://github.com/zeek/zeek.git
synced 2025-10-06 16:48:19 +00:00
Extend file analysis API to allow file ID caching, adapt HTTP to use it.
This allows an analyzer to either provide file IDs associated with some file content or to cache a file ID that was already determined by script-layer logic so that subsequent calls to the file analysis interface can bypass costly detours through script-layer. This can yield a decent performance improvement for analyzers that are able to take advantage of it and deal with streaming content (like HTTP).
This commit is contained in:
parent
55a8725ce2
commit
1842d324cb
4 changed files with 95 additions and 40 deletions
|
@ -242,10 +242,11 @@ int HTTP_Entity::Undelivered(int64_t len)
|
|||
if ( end_of_data && in_header )
|
||||
return 0;
|
||||
|
||||
file_mgr->Gap(body_length, len,
|
||||
cached_file_id = file_mgr->Gap(body_length, len,
|
||||
http_message->MyHTTP_Analyzer()->GetAnalyzerTag(),
|
||||
http_message->MyHTTP_Analyzer()->Conn(),
|
||||
http_message->IsOrig());
|
||||
http_message->IsOrig(),
|
||||
cached_file_id);
|
||||
|
||||
if ( chunked_transfer_state != NON_CHUNKED_TRANSFER )
|
||||
{
|
||||
|
@ -314,15 +315,18 @@ void HTTP_Entity::SubmitData(int len, const char* buf)
|
|||
else
|
||||
{
|
||||
if ( send_size && content_length > 0 )
|
||||
file_mgr->SetSize(content_length,
|
||||
cached_file_id = file_mgr->SetSize(content_length,
|
||||
http_message->MyHTTP_Analyzer()->GetAnalyzerTag(),
|
||||
http_message->MyHTTP_Analyzer()->Conn(),
|
||||
http_message->IsOrig());
|
||||
http_message->IsOrig(),
|
||||
cached_file_id);
|
||||
|
||||
file_mgr->DataIn(reinterpret_cast<const u_char*>(buf), len,
|
||||
cached_file_id = file_mgr->DataIn(reinterpret_cast<const u_char*>(buf),
|
||||
len,
|
||||
http_message->MyHTTP_Analyzer()->GetAnalyzerTag(),
|
||||
http_message->MyHTTP_Analyzer()->Conn(),
|
||||
http_message->IsOrig());
|
||||
http_message->IsOrig(),
|
||||
cached_file_id);
|
||||
}
|
||||
|
||||
send_size = false;
|
||||
|
|
|
@ -64,6 +64,7 @@ protected:
|
|||
uint64_t offset;
|
||||
int64_t instance_length; // total length indicated by content-range
|
||||
bool send_size; // whether to send size indication to FAF
|
||||
std::string cached_file_id;
|
||||
|
||||
MIME_Entity* NewChildEntity() { return new HTTP_Entity(http_message, this, 1); }
|
||||
|
||||
|
|
|
@ -75,36 +75,47 @@ void Manager::SetHandle(const string& handle)
|
|||
current_file_id = HashHandle(handle);
|
||||
}
|
||||
|
||||
void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
|
||||
analyzer::Tag tag, Connection* conn, bool is_orig)
|
||||
string Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
|
||||
analyzer::Tag tag, Connection* conn, bool is_orig,
|
||||
const string& cached_id)
|
||||
{
|
||||
GetFileHandle(tag, conn, is_orig);
|
||||
File* file = GetFile(current_file_id, conn, tag, is_orig);
|
||||
string id = cached_id.empty() ? GetFileID(tag, conn, is_orig) : cached_id;
|
||||
File* file = GetFile(id, conn, tag, is_orig);
|
||||
|
||||
if ( ! file )
|
||||
return;
|
||||
return "";
|
||||
|
||||
file->DataIn(data, len, offset);
|
||||
|
||||
if ( file->IsComplete() )
|
||||
{
|
||||
RemoveFile(file->GetID());
|
||||
return "";
|
||||
}
|
||||
|
||||
void Manager::DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
|
||||
Connection* conn, bool is_orig)
|
||||
return id;
|
||||
}
|
||||
|
||||
string Manager::DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
|
||||
Connection* conn, bool is_orig, const string& cached_id)
|
||||
{
|
||||
GetFileHandle(tag, conn, is_orig);
|
||||
string id = cached_id.empty() ? GetFileID(tag, conn, is_orig) : cached_id;
|
||||
// Sequential data input shouldn't be going over multiple conns, so don't
|
||||
// do the check to update connection set.
|
||||
File* file = GetFile(current_file_id, conn, tag, is_orig, false);
|
||||
File* file = GetFile(id, conn, tag, is_orig, false);
|
||||
|
||||
if ( ! file )
|
||||
return;
|
||||
return "";
|
||||
|
||||
file->DataIn(data, len);
|
||||
|
||||
if ( file->IsComplete() )
|
||||
{
|
||||
RemoveFile(file->GetID());
|
||||
return "";
|
||||
}
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
void Manager::DataIn(const u_char* data, uint64 len, const string& file_id,
|
||||
|
@ -133,8 +144,7 @@ void Manager::EndOfFile(analyzer::Tag tag, Connection* conn)
|
|||
void Manager::EndOfFile(analyzer::Tag tag, Connection* conn, bool is_orig)
|
||||
{
|
||||
// Don't need to create a file if we're just going to remove it right away.
|
||||
GetFileHandle(tag, conn, is_orig);
|
||||
RemoveFile(current_file_id);
|
||||
RemoveFile(GetFileID(tag, conn, is_orig));
|
||||
}
|
||||
|
||||
void Manager::EndOfFile(const string& file_id)
|
||||
|
@ -142,31 +152,37 @@ void Manager::EndOfFile(const string& file_id)
|
|||
RemoveFile(file_id);
|
||||
}
|
||||
|
||||
void Manager::Gap(uint64 offset, uint64 len, analyzer::Tag tag,
|
||||
Connection* conn, bool is_orig)
|
||||
string Manager::Gap(uint64 offset, uint64 len, analyzer::Tag tag,
|
||||
Connection* conn, bool is_orig, const string& cached_id)
|
||||
{
|
||||
GetFileHandle(tag, conn, is_orig);
|
||||
File* file = GetFile(current_file_id, conn, tag, is_orig);
|
||||
string id = cached_id.empty() ? GetFileID(tag, conn, is_orig) : cached_id;
|
||||
File* file = GetFile(id, conn, tag, is_orig);
|
||||
|
||||
if ( ! file )
|
||||
return;
|
||||
return "";
|
||||
|
||||
file->Gap(offset, len);
|
||||
return id;
|
||||
}
|
||||
|
||||
void Manager::SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
|
||||
bool is_orig)
|
||||
string Manager::SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
|
||||
bool is_orig, const string& cached_id)
|
||||
{
|
||||
GetFileHandle(tag, conn, is_orig);
|
||||
File* file = GetFile(current_file_id, conn, tag, is_orig);
|
||||
string id = cached_id.empty() ? GetFileID(tag, conn, is_orig) : cached_id;
|
||||
File* file = GetFile(id, conn, tag, is_orig);
|
||||
|
||||
if ( ! file )
|
||||
return;
|
||||
return "";
|
||||
|
||||
file->SetTotalBytes(size);
|
||||
|
||||
if ( file->IsComplete() )
|
||||
{
|
||||
RemoveFile(file->GetID());
|
||||
return "";
|
||||
}
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
bool Manager::SetTimeoutInterval(const string& file_id, double interval) const
|
||||
|
@ -317,15 +333,15 @@ bool Manager::IsIgnored(const string& file_id)
|
|||
return ignored.find(file_id) != ignored.end();
|
||||
}
|
||||
|
||||
void Manager::GetFileHandle(analyzer::Tag tag, Connection* c, bool is_orig)
|
||||
string Manager::GetFileID(analyzer::Tag tag, Connection* c, bool is_orig)
|
||||
{
|
||||
current_file_id.clear();
|
||||
|
||||
if ( IsDisabled(tag) )
|
||||
return;
|
||||
return "";
|
||||
|
||||
if ( ! get_file_handle )
|
||||
return;
|
||||
return "";
|
||||
|
||||
EnumVal* tagval = tag.AsEnumVal();
|
||||
Ref(tagval);
|
||||
|
@ -337,6 +353,7 @@ void Manager::GetFileHandle(analyzer::Tag tag, Connection* c, bool is_orig)
|
|||
|
||||
mgr.QueueEvent(get_file_handle, vl);
|
||||
mgr.Drain(); // need file handle immediately so we don't have to buffer data
|
||||
return current_file_id;
|
||||
}
|
||||
|
||||
bool Manager::IsDisabled(analyzer::Tag tag)
|
||||
|
|
|
@ -82,9 +82,17 @@ public:
|
|||
* @param conn network connection over which the file data is transferred.
|
||||
* @param is_orig true if the file is being sent from connection originator
|
||||
* or false if is being sent in the opposite direction.
|
||||
* @param cached_file_id may be set to a previous return value in order to
|
||||
* bypass costly file handle lookups.
|
||||
* @return a unique file ID string which, in certain contexts, may be
|
||||
* cached and passed back in to a subsequent function call in order
|
||||
* to avoid costly file handle lookups (which have to go through
|
||||
* the \c get_file_handle script-layer event). An empty string
|
||||
* indicates the associate file is not going to be analyzed further.
|
||||
*/
|
||||
void DataIn(const u_char* data, uint64 len, uint64 offset,
|
||||
analyzer::Tag tag, Connection* conn, bool is_orig);
|
||||
std::string DataIn(const u_char* data, uint64 len, uint64 offset,
|
||||
analyzer::Tag tag, Connection* conn, bool is_orig,
|
||||
const std::string& cached_file_id = "");
|
||||
|
||||
/**
|
||||
* Pass in sequential file data.
|
||||
|
@ -94,9 +102,17 @@ public:
|
|||
* @param conn network connection over which the file data is transferred.
|
||||
* @param is_orig true if the file is being sent from connection originator
|
||||
* or false if is being sent in the opposite direction.
|
||||
* @param cached_file_id may be set to a previous return value in order to
|
||||
* bypass costly file handle lookups.
|
||||
* @return a unique file ID string which, in certain contexts, may be
|
||||
* cached and passed back in to a subsequent function call in order
|
||||
* to avoid costly file handle lookups (which have to go through
|
||||
* the \c get_file_handle script-layer event). An empty string
|
||||
* indicates the associate file is not going to be analyzed further.
|
||||
*/
|
||||
void DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
|
||||
Connection* conn, bool is_orig);
|
||||
std::string DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
|
||||
Connection* conn, bool is_orig,
|
||||
const std::string& cached_file_id = "");
|
||||
|
||||
/**
|
||||
* Pass in sequential file data from external source (e.g. input framework).
|
||||
|
@ -140,9 +156,17 @@ public:
|
|||
* @param conn network connection over which the file data is transferred.
|
||||
* @param is_orig true if the file is being sent from connection originator
|
||||
* or false if is being sent in the opposite direction.
|
||||
* @param cached_file_id may be set to a previous return value in order to
|
||||
* bypass costly file handle lookups.
|
||||
* @return a unique file ID string which, in certain contexts, may be
|
||||
* cached and passed back in to a subsequent function call in order
|
||||
* to avoid costly file handle lookups (which have to go through
|
||||
* the \c get_file_handle script-layer event). An empty string
|
||||
* indicates the associate file is not going to be analyzed further.
|
||||
*/
|
||||
void Gap(uint64 offset, uint64 len, analyzer::Tag tag, Connection* conn,
|
||||
bool is_orig);
|
||||
std::string Gap(uint64 offset, uint64 len, analyzer::Tag tag,
|
||||
Connection* conn, bool is_orig,
|
||||
const std::string& cached_file_id = "");
|
||||
|
||||
/**
|
||||
* Provide the expected number of bytes that comprise a file.
|
||||
|
@ -151,9 +175,16 @@ public:
|
|||
* @param conn network connection over which the file data is transferred.
|
||||
* @param is_orig true if the file is being sent from connection originator
|
||||
* or false if is being sent in the opposite direction.
|
||||
* @param cached_file_id may be set to a previous return value in order to
|
||||
* bypass costly file handle lookups.
|
||||
* @return a unique file ID string which, in certain contexts, may be
|
||||
* cached and passed back in to a subsequent function call in order
|
||||
* to avoid costly file handle lookups (which have to go through
|
||||
* the \c get_file_handle script-layer event). An empty string
|
||||
* indicates the associate file is not going to be analyzed further.
|
||||
*/
|
||||
void SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
|
||||
bool is_orig);
|
||||
std::string SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
|
||||
bool is_orig, const std::string& cached_file_id = "");
|
||||
|
||||
/**
|
||||
* Starts ignoring a file, which will finally be removed from internal
|
||||
|
@ -283,8 +314,10 @@ protected:
|
|||
* @param conn network connection over which the file is transferred.
|
||||
* @param is_orig true if the file is being sent from connection originator
|
||||
* or false if is being sent in the opposite direction.
|
||||
* @return #current_file_id, which is a hash of a unique file handle string
|
||||
* set by a \c get_file_handle event handler.
|
||||
*/
|
||||
void GetFileHandle(analyzer::Tag tag, Connection* c, bool is_orig);
|
||||
std::string GetFileID(analyzer::Tag tag, Connection* c, bool is_orig);
|
||||
|
||||
/**
|
||||
* Check if analysis is available for files transferred over a given
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue