mirror of
https://github.com/zeek/zeek.git
synced 2025-10-04 15:48:19 +00:00
Merge remote-tracking branch 'origin/topic/jsiwek/http-file-id-caching'
* origin/topic/jsiwek/http-file-id-caching: Revert use of HTTP file ID caching for gaps range request content. Extend file analysis API to allow file ID caching, adapt HTTP to use it. BIT-1125 #merged
This commit is contained in:
commit
d4b5da1597
8 changed files with 122 additions and 44 deletions
11
CHANGES
11
CHANGES
|
@ -1,4 +1,15 @@
|
||||||
|
|
||||||
|
2.2-131 | 2014-01-30 16:11:11 -0800
|
||||||
|
|
||||||
|
* Extend file analysis API to allow file ID caching. This allows an
|
||||||
|
analyzer to either provide file IDs associated with some file
|
||||||
|
content or to cache a file ID that was already determined by
|
||||||
|
script-layer logic so that subsequent calls to the file analysis
|
||||||
|
interface can bypass costly detours through script-layer. This
|
||||||
|
can yield a decent performance improvement for analyzers that are
|
||||||
|
able to take advantage of it and deal with streaming content (like
|
||||||
|
HTTP, which has been adapted accordingly). (Jon Siwek)
|
||||||
|
|
||||||
2.2-128 | 2014-01-30 15:58:47 -0800
|
2.2-128 | 2014-01-30 15:58:47 -0800
|
||||||
|
|
||||||
* Add leak test for Exec module. (Bernhard Amann)
|
* Add leak test for Exec module. (Bernhard Amann)
|
||||||
|
|
6
NEWS
6
NEWS
|
@ -41,6 +41,12 @@ Changed Functionality
|
||||||
|
|
||||||
event x509_extension(c: connection, is_orig: bool, cert: X509, ext: X509_extension_info);
|
event x509_extension(c: connection, is_orig: bool, cert: X509, ext: X509_extension_info);
|
||||||
|
|
||||||
|
- Bro no longer special-cases SYN/FIN/RST-filtered traces by not
|
||||||
|
reporting missing data. The old behavior can be reverted by
|
||||||
|
redef'ing "detect_filtered_trace".
|
||||||
|
|
||||||
|
TODO: Update if we add a detector for filtered traces.
|
||||||
|
|
||||||
Bro 2.2
|
Bro 2.2
|
||||||
=======
|
=======
|
||||||
|
|
||||||
|
|
2
VERSION
2
VERSION
|
@ -1 +1 @@
|
||||||
2.2-128
|
2.2-131
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
Subproject commit 58b9c8bf762024136ec2c9bbcea16d417282af8d
|
Subproject commit 36b96eb9c13d1011bbc8be3581fd0f1c0bd8de44
|
|
@ -242,10 +242,17 @@ int HTTP_Entity::Undelivered(int64_t len)
|
||||||
if ( end_of_data && in_header )
|
if ( end_of_data && in_header )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
file_mgr->Gap(body_length, len,
|
if ( is_partial_content )
|
||||||
http_message->MyHTTP_Analyzer()->GetAnalyzerTag(),
|
file_mgr->Gap(body_length, len,
|
||||||
http_message->MyHTTP_Analyzer()->Conn(),
|
http_message->MyHTTP_Analyzer()->GetAnalyzerTag(),
|
||||||
http_message->IsOrig());
|
http_message->MyHTTP_Analyzer()->Conn(),
|
||||||
|
http_message->IsOrig());
|
||||||
|
else
|
||||||
|
precomputed_file_id = file_mgr->Gap(body_length, len,
|
||||||
|
http_message->MyHTTP_Analyzer()->GetAnalyzerTag(),
|
||||||
|
http_message->MyHTTP_Analyzer()->Conn(),
|
||||||
|
http_message->IsOrig(),
|
||||||
|
precomputed_file_id);
|
||||||
|
|
||||||
if ( chunked_transfer_state != NON_CHUNKED_TRANSFER )
|
if ( chunked_transfer_state != NON_CHUNKED_TRANSFER )
|
||||||
{
|
{
|
||||||
|
@ -314,15 +321,18 @@ void HTTP_Entity::SubmitData(int len, const char* buf)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if ( send_size && content_length > 0 )
|
if ( send_size && content_length > 0 )
|
||||||
file_mgr->SetSize(content_length,
|
precomputed_file_id = file_mgr->SetSize(content_length,
|
||||||
http_message->MyHTTP_Analyzer()->GetAnalyzerTag(),
|
http_message->MyHTTP_Analyzer()->GetAnalyzerTag(),
|
||||||
http_message->MyHTTP_Analyzer()->Conn(),
|
http_message->MyHTTP_Analyzer()->Conn(),
|
||||||
http_message->IsOrig());
|
http_message->IsOrig(),
|
||||||
|
precomputed_file_id);
|
||||||
|
|
||||||
file_mgr->DataIn(reinterpret_cast<const u_char*>(buf), len,
|
precomputed_file_id = file_mgr->DataIn(reinterpret_cast<const u_char*>(buf),
|
||||||
|
len,
|
||||||
http_message->MyHTTP_Analyzer()->GetAnalyzerTag(),
|
http_message->MyHTTP_Analyzer()->GetAnalyzerTag(),
|
||||||
http_message->MyHTTP_Analyzer()->Conn(),
|
http_message->MyHTTP_Analyzer()->Conn(),
|
||||||
http_message->IsOrig());
|
http_message->IsOrig(),
|
||||||
|
precomputed_file_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
send_size = false;
|
send_size = false;
|
||||||
|
|
|
@ -64,6 +64,7 @@ protected:
|
||||||
uint64_t offset;
|
uint64_t offset;
|
||||||
int64_t instance_length; // total length indicated by content-range
|
int64_t instance_length; // total length indicated by content-range
|
||||||
bool send_size; // whether to send size indication to FAF
|
bool send_size; // whether to send size indication to FAF
|
||||||
|
std::string precomputed_file_id;
|
||||||
|
|
||||||
MIME_Entity* NewChildEntity() { return new HTTP_Entity(http_message, this, 1); }
|
MIME_Entity* NewChildEntity() { return new HTTP_Entity(http_message, this, 1); }
|
||||||
|
|
||||||
|
|
|
@ -75,36 +75,47 @@ void Manager::SetHandle(const string& handle)
|
||||||
current_file_id = HashHandle(handle);
|
current_file_id = HashHandle(handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
|
string Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
|
||||||
analyzer::Tag tag, Connection* conn, bool is_orig)
|
analyzer::Tag tag, Connection* conn, bool is_orig,
|
||||||
|
const string& precomputed_id)
|
||||||
{
|
{
|
||||||
GetFileHandle(tag, conn, is_orig);
|
string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
|
||||||
File* file = GetFile(current_file_id, conn, tag, is_orig);
|
File* file = GetFile(id, conn, tag, is_orig);
|
||||||
|
|
||||||
if ( ! file )
|
if ( ! file )
|
||||||
return;
|
return "";
|
||||||
|
|
||||||
file->DataIn(data, len, offset);
|
file->DataIn(data, len, offset);
|
||||||
|
|
||||||
if ( file->IsComplete() )
|
if ( file->IsComplete() )
|
||||||
|
{
|
||||||
RemoveFile(file->GetID());
|
RemoveFile(file->GetID());
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
return id;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Manager::DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
|
string Manager::DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
|
||||||
Connection* conn, bool is_orig)
|
Connection* conn, bool is_orig, const string& precomputed_id)
|
||||||
{
|
{
|
||||||
GetFileHandle(tag, conn, is_orig);
|
string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
|
||||||
// Sequential data input shouldn't be going over multiple conns, so don't
|
// Sequential data input shouldn't be going over multiple conns, so don't
|
||||||
// do the check to update connection set.
|
// do the check to update connection set.
|
||||||
File* file = GetFile(current_file_id, conn, tag, is_orig, false);
|
File* file = GetFile(id, conn, tag, is_orig, false);
|
||||||
|
|
||||||
if ( ! file )
|
if ( ! file )
|
||||||
return;
|
return "";
|
||||||
|
|
||||||
file->DataIn(data, len);
|
file->DataIn(data, len);
|
||||||
|
|
||||||
if ( file->IsComplete() )
|
if ( file->IsComplete() )
|
||||||
|
{
|
||||||
RemoveFile(file->GetID());
|
RemoveFile(file->GetID());
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
return id;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Manager::DataIn(const u_char* data, uint64 len, const string& file_id,
|
void Manager::DataIn(const u_char* data, uint64 len, const string& file_id,
|
||||||
|
@ -133,8 +144,7 @@ void Manager::EndOfFile(analyzer::Tag tag, Connection* conn)
|
||||||
void Manager::EndOfFile(analyzer::Tag tag, Connection* conn, bool is_orig)
|
void Manager::EndOfFile(analyzer::Tag tag, Connection* conn, bool is_orig)
|
||||||
{
|
{
|
||||||
// Don't need to create a file if we're just going to remove it right away.
|
// Don't need to create a file if we're just going to remove it right away.
|
||||||
GetFileHandle(tag, conn, is_orig);
|
RemoveFile(GetFileID(tag, conn, is_orig));
|
||||||
RemoveFile(current_file_id);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Manager::EndOfFile(const string& file_id)
|
void Manager::EndOfFile(const string& file_id)
|
||||||
|
@ -142,31 +152,37 @@ void Manager::EndOfFile(const string& file_id)
|
||||||
RemoveFile(file_id);
|
RemoveFile(file_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Manager::Gap(uint64 offset, uint64 len, analyzer::Tag tag,
|
string Manager::Gap(uint64 offset, uint64 len, analyzer::Tag tag,
|
||||||
Connection* conn, bool is_orig)
|
Connection* conn, bool is_orig, const string& precomputed_id)
|
||||||
{
|
{
|
||||||
GetFileHandle(tag, conn, is_orig);
|
string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
|
||||||
File* file = GetFile(current_file_id, conn, tag, is_orig);
|
File* file = GetFile(id, conn, tag, is_orig);
|
||||||
|
|
||||||
if ( ! file )
|
if ( ! file )
|
||||||
return;
|
return "";
|
||||||
|
|
||||||
file->Gap(offset, len);
|
file->Gap(offset, len);
|
||||||
|
return id;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Manager::SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
|
string Manager::SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
|
||||||
bool is_orig)
|
bool is_orig, const string& precomputed_id)
|
||||||
{
|
{
|
||||||
GetFileHandle(tag, conn, is_orig);
|
string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
|
||||||
File* file = GetFile(current_file_id, conn, tag, is_orig);
|
File* file = GetFile(id, conn, tag, is_orig);
|
||||||
|
|
||||||
if ( ! file )
|
if ( ! file )
|
||||||
return;
|
return "";
|
||||||
|
|
||||||
file->SetTotalBytes(size);
|
file->SetTotalBytes(size);
|
||||||
|
|
||||||
if ( file->IsComplete() )
|
if ( file->IsComplete() )
|
||||||
|
{
|
||||||
RemoveFile(file->GetID());
|
RemoveFile(file->GetID());
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
return id;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Manager::SetTimeoutInterval(const string& file_id, double interval) const
|
bool Manager::SetTimeoutInterval(const string& file_id, double interval) const
|
||||||
|
@ -317,15 +333,15 @@ bool Manager::IsIgnored(const string& file_id)
|
||||||
return ignored.find(file_id) != ignored.end();
|
return ignored.find(file_id) != ignored.end();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Manager::GetFileHandle(analyzer::Tag tag, Connection* c, bool is_orig)
|
string Manager::GetFileID(analyzer::Tag tag, Connection* c, bool is_orig)
|
||||||
{
|
{
|
||||||
current_file_id.clear();
|
current_file_id.clear();
|
||||||
|
|
||||||
if ( IsDisabled(tag) )
|
if ( IsDisabled(tag) )
|
||||||
return;
|
return "";
|
||||||
|
|
||||||
if ( ! get_file_handle )
|
if ( ! get_file_handle )
|
||||||
return;
|
return "";
|
||||||
|
|
||||||
EnumVal* tagval = tag.AsEnumVal();
|
EnumVal* tagval = tag.AsEnumVal();
|
||||||
Ref(tagval);
|
Ref(tagval);
|
||||||
|
@ -337,6 +353,7 @@ void Manager::GetFileHandle(analyzer::Tag tag, Connection* c, bool is_orig)
|
||||||
|
|
||||||
mgr.QueueEvent(get_file_handle, vl);
|
mgr.QueueEvent(get_file_handle, vl);
|
||||||
mgr.Drain(); // need file handle immediately so we don't have to buffer data
|
mgr.Drain(); // need file handle immediately so we don't have to buffer data
|
||||||
|
return current_file_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Manager::IsDisabled(analyzer::Tag tag)
|
bool Manager::IsDisabled(analyzer::Tag tag)
|
||||||
|
|
|
@ -82,9 +82,17 @@ public:
|
||||||
* @param conn network connection over which the file data is transferred.
|
* @param conn network connection over which the file data is transferred.
|
||||||
* @param is_orig true if the file is being sent from connection originator
|
* @param is_orig true if the file is being sent from connection originator
|
||||||
* or false if is being sent in the opposite direction.
|
* or false if is being sent in the opposite direction.
|
||||||
|
* @param precomputed_file_id may be set to a previous return value in order to
|
||||||
|
* bypass costly file handle lookups.
|
||||||
|
* @return a unique file ID string which, in certain contexts, may be
|
||||||
|
* cached and passed back in to a subsequent function call in order
|
||||||
|
* to avoid costly file handle lookups (which have to go through
|
||||||
|
* the \c get_file_handle script-layer event). An empty string
|
||||||
|
* indicates the associate file is not going to be analyzed further.
|
||||||
*/
|
*/
|
||||||
void DataIn(const u_char* data, uint64 len, uint64 offset,
|
std::string DataIn(const u_char* data, uint64 len, uint64 offset,
|
||||||
analyzer::Tag tag, Connection* conn, bool is_orig);
|
analyzer::Tag tag, Connection* conn, bool is_orig,
|
||||||
|
const std::string& precomputed_file_id = "");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pass in sequential file data.
|
* Pass in sequential file data.
|
||||||
|
@ -94,9 +102,17 @@ public:
|
||||||
* @param conn network connection over which the file data is transferred.
|
* @param conn network connection over which the file data is transferred.
|
||||||
* @param is_orig true if the file is being sent from connection originator
|
* @param is_orig true if the file is being sent from connection originator
|
||||||
* or false if is being sent in the opposite direction.
|
* or false if is being sent in the opposite direction.
|
||||||
|
* @param precomputed_file_id may be set to a previous return value in order to
|
||||||
|
* bypass costly file handle lookups.
|
||||||
|
* @return a unique file ID string which, in certain contexts, may be
|
||||||
|
* cached and passed back in to a subsequent function call in order
|
||||||
|
* to avoid costly file handle lookups (which have to go through
|
||||||
|
* the \c get_file_handle script-layer event). An empty string
|
||||||
|
* indicates the associate file is not going to be analyzed further.
|
||||||
*/
|
*/
|
||||||
void DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
|
std::string DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
|
||||||
Connection* conn, bool is_orig);
|
Connection* conn, bool is_orig,
|
||||||
|
const std::string& precomputed_file_id = "");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pass in sequential file data from external source (e.g. input framework).
|
* Pass in sequential file data from external source (e.g. input framework).
|
||||||
|
@ -140,9 +156,17 @@ public:
|
||||||
* @param conn network connection over which the file data is transferred.
|
* @param conn network connection over which the file data is transferred.
|
||||||
* @param is_orig true if the file is being sent from connection originator
|
* @param is_orig true if the file is being sent from connection originator
|
||||||
* or false if is being sent in the opposite direction.
|
* or false if is being sent in the opposite direction.
|
||||||
|
* @param precomputed_file_id may be set to a previous return value in order to
|
||||||
|
* bypass costly file handle lookups.
|
||||||
|
* @return a unique file ID string which, in certain contexts, may be
|
||||||
|
* cached and passed back in to a subsequent function call in order
|
||||||
|
* to avoid costly file handle lookups (which have to go through
|
||||||
|
* the \c get_file_handle script-layer event). An empty string
|
||||||
|
* indicates the associate file is not going to be analyzed further.
|
||||||
*/
|
*/
|
||||||
void Gap(uint64 offset, uint64 len, analyzer::Tag tag, Connection* conn,
|
std::string Gap(uint64 offset, uint64 len, analyzer::Tag tag,
|
||||||
bool is_orig);
|
Connection* conn, bool is_orig,
|
||||||
|
const std::string& precomputed_file_id = "");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Provide the expected number of bytes that comprise a file.
|
* Provide the expected number of bytes that comprise a file.
|
||||||
|
@ -151,9 +175,16 @@ public:
|
||||||
* @param conn network connection over which the file data is transferred.
|
* @param conn network connection over which the file data is transferred.
|
||||||
* @param is_orig true if the file is being sent from connection originator
|
* @param is_orig true if the file is being sent from connection originator
|
||||||
* or false if is being sent in the opposite direction.
|
* or false if is being sent in the opposite direction.
|
||||||
|
* @param precomputed_file_id may be set to a previous return value in order to
|
||||||
|
* bypass costly file handle lookups.
|
||||||
|
* @return a unique file ID string which, in certain contexts, may be
|
||||||
|
* cached and passed back in to a subsequent function call in order
|
||||||
|
* to avoid costly file handle lookups (which have to go through
|
||||||
|
* the \c get_file_handle script-layer event). An empty string
|
||||||
|
* indicates the associate file is not going to be analyzed further.
|
||||||
*/
|
*/
|
||||||
void SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
|
std::string SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
|
||||||
bool is_orig);
|
bool is_orig, const std::string& precomputed_file_id = "");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Starts ignoring a file, which will finally be removed from internal
|
* Starts ignoring a file, which will finally be removed from internal
|
||||||
|
@ -283,8 +314,10 @@ protected:
|
||||||
* @param conn network connection over which the file is transferred.
|
* @param conn network connection over which the file is transferred.
|
||||||
* @param is_orig true if the file is being sent from connection originator
|
* @param is_orig true if the file is being sent from connection originator
|
||||||
* or false if is being sent in the opposite direction.
|
* or false if is being sent in the opposite direction.
|
||||||
|
* @return #current_file_id, which is a hash of a unique file handle string
|
||||||
|
* set by a \c get_file_handle event handler.
|
||||||
*/
|
*/
|
||||||
void GetFileHandle(analyzer::Tag tag, Connection* c, bool is_orig);
|
std::string GetFileID(analyzer::Tag tag, Connection* c, bool is_orig);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if analysis is available for files transferred over a given
|
* Check if analysis is available for files transferred over a given
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue