Merge remote-tracking branch 'origin/topic/jsiwek/http-file-id-caching'

* origin/topic/jsiwek/http-file-id-caching:
  Revert use of HTTP file ID caching for gaps range request content.
  Extend file analysis API to allow file ID caching, adapt HTTP to use it.

BIT-1125 #merged
This commit is contained in:
Robin Sommer 2014-01-30 16:11:11 -08:00
commit d4b5da1597
8 changed files with 122 additions and 44 deletions

View file

@ -75,36 +75,47 @@ void Manager::SetHandle(const string& handle)
current_file_id = HashHandle(handle);
}
void Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
analyzer::Tag tag, Connection* conn, bool is_orig)
string Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
analyzer::Tag tag, Connection* conn, bool is_orig,
const string& precomputed_id)
{
GetFileHandle(tag, conn, is_orig);
File* file = GetFile(current_file_id, conn, tag, is_orig);
string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
File* file = GetFile(id, conn, tag, is_orig);
if ( ! file )
return;
return "";
file->DataIn(data, len, offset);
if ( file->IsComplete() )
{
RemoveFile(file->GetID());
return "";
}
return id;
}
void Manager::DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
Connection* conn, bool is_orig)
string Manager::DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
Connection* conn, bool is_orig, const string& precomputed_id)
{
GetFileHandle(tag, conn, is_orig);
string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
// Sequential data input shouldn't be going over multiple conns, so don't
// do the check to update connection set.
File* file = GetFile(current_file_id, conn, tag, is_orig, false);
File* file = GetFile(id, conn, tag, is_orig, false);
if ( ! file )
return;
return "";
file->DataIn(data, len);
if ( file->IsComplete() )
{
RemoveFile(file->GetID());
return "";
}
return id;
}
void Manager::DataIn(const u_char* data, uint64 len, const string& file_id,
@ -133,8 +144,7 @@ void Manager::EndOfFile(analyzer::Tag tag, Connection* conn)
void Manager::EndOfFile(analyzer::Tag tag, Connection* conn, bool is_orig)
{
// Don't need to create a file if we're just going to remove it right away.
GetFileHandle(tag, conn, is_orig);
RemoveFile(current_file_id);
RemoveFile(GetFileID(tag, conn, is_orig));
}
void Manager::EndOfFile(const string& file_id)
@ -142,31 +152,37 @@ void Manager::EndOfFile(const string& file_id)
RemoveFile(file_id);
}
void Manager::Gap(uint64 offset, uint64 len, analyzer::Tag tag,
Connection* conn, bool is_orig)
string Manager::Gap(uint64 offset, uint64 len, analyzer::Tag tag,
Connection* conn, bool is_orig, const string& precomputed_id)
{
GetFileHandle(tag, conn, is_orig);
File* file = GetFile(current_file_id, conn, tag, is_orig);
string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
File* file = GetFile(id, conn, tag, is_orig);
if ( ! file )
return;
return "";
file->Gap(offset, len);
return id;
}
void Manager::SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
bool is_orig)
string Manager::SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
bool is_orig, const string& precomputed_id)
{
GetFileHandle(tag, conn, is_orig);
File* file = GetFile(current_file_id, conn, tag, is_orig);
string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
File* file = GetFile(id, conn, tag, is_orig);
if ( ! file )
return;
return "";
file->SetTotalBytes(size);
if ( file->IsComplete() )
{
RemoveFile(file->GetID());
return "";
}
return id;
}
bool Manager::SetTimeoutInterval(const string& file_id, double interval) const
@ -317,15 +333,15 @@ bool Manager::IsIgnored(const string& file_id)
return ignored.find(file_id) != ignored.end();
}
void Manager::GetFileHandle(analyzer::Tag tag, Connection* c, bool is_orig)
string Manager::GetFileID(analyzer::Tag tag, Connection* c, bool is_orig)
{
current_file_id.clear();
if ( IsDisabled(tag) )
return;
return "";
if ( ! get_file_handle )
return;
return "";
EnumVal* tagval = tag.AsEnumVal();
Ref(tagval);
@ -337,6 +353,7 @@ void Manager::GetFileHandle(analyzer::Tag tag, Connection* c, bool is_orig)
mgr.QueueEvent(get_file_handle, vl);
mgr.Drain(); // need file handle immediately so we don't have to buffer data
return current_file_id;
}
bool Manager::IsDisabled(analyzer::Tag tag)

View file

@ -82,9 +82,17 @@ public:
* @param conn network connection over which the file data is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
* @param precomputed_file_id may be set to a previous return value in order to
* bypass costly file handle lookups.
* @return a unique file ID string which, in certain contexts, may be
* cached and passed back in to a subsequent function call in order
* to avoid costly file handle lookups (which have to go through
* the \c get_file_handle script-layer event). An empty string
* indicates the associate file is not going to be analyzed further.
*/
void DataIn(const u_char* data, uint64 len, uint64 offset,
analyzer::Tag tag, Connection* conn, bool is_orig);
std::string DataIn(const u_char* data, uint64 len, uint64 offset,
analyzer::Tag tag, Connection* conn, bool is_orig,
const std::string& precomputed_file_id = "");
/**
* Pass in sequential file data.
@ -94,9 +102,17 @@ public:
* @param conn network connection over which the file data is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
* @param precomputed_file_id may be set to a previous return value in order to
* bypass costly file handle lookups.
* @return a unique file ID string which, in certain contexts, may be
* cached and passed back in to a subsequent function call in order
* to avoid costly file handle lookups (which have to go through
* the \c get_file_handle script-layer event). An empty string
* indicates the associate file is not going to be analyzed further.
*/
void DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
Connection* conn, bool is_orig);
std::string DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
Connection* conn, bool is_orig,
const std::string& precomputed_file_id = "");
/**
* Pass in sequential file data from external source (e.g. input framework).
@ -140,9 +156,17 @@ public:
* @param conn network connection over which the file data is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
* @param precomputed_file_id may be set to a previous return value in order to
* bypass costly file handle lookups.
* @return a unique file ID string which, in certain contexts, may be
* cached and passed back in to a subsequent function call in order
* to avoid costly file handle lookups (which have to go through
* the \c get_file_handle script-layer event). An empty string
* indicates the associate file is not going to be analyzed further.
*/
void Gap(uint64 offset, uint64 len, analyzer::Tag tag, Connection* conn,
bool is_orig);
std::string Gap(uint64 offset, uint64 len, analyzer::Tag tag,
Connection* conn, bool is_orig,
const std::string& precomputed_file_id = "");
/**
* Provide the expected number of bytes that comprise a file.
@ -151,9 +175,16 @@ public:
* @param conn network connection over which the file data is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
* @param precomputed_file_id may be set to a previous return value in order to
* bypass costly file handle lookups.
* @return a unique file ID string which, in certain contexts, may be
* cached and passed back in to a subsequent function call in order
* to avoid costly file handle lookups (which have to go through
* the \c get_file_handle script-layer event). An empty string
* indicates the associate file is not going to be analyzed further.
*/
void SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
bool is_orig);
std::string SetSize(uint64 size, analyzer::Tag tag, Connection* conn,
bool is_orig, const std::string& precomputed_file_id = "");
/**
* Starts ignoring a file, which will finally be removed from internal
@ -283,8 +314,10 @@ protected:
* @param conn network connection over which the file is transferred.
* @param is_orig true if the file is being sent from connection originator
* or false if is being sent in the opposite direction.
* @return #current_file_id, which is a hash of a unique file handle string
* set by a \c get_file_handle event handler.
*/
void GetFileHandle(analyzer::Tag tag, Connection* c, bool is_orig);
std::string GetFileID(analyzer::Tag tag, Connection* c, bool is_orig);
/**
* Check if analysis is available for files transferred over a given