Allow File analyzers to direcly pass mime type.

This makes it much easier for protocols where the mime type is known in
advance like, for example, TLS. We now do no longer have to perform deep
script-level magic.
This commit is contained in:
Johanna Amann 2017-02-10 12:34:23 -08:00
parent 1b19ab78b6
commit 9fd7816501
12 changed files with 89 additions and 26 deletions

View file

@ -38,6 +38,15 @@ export {
event bro_init() &priority=5
{
Log::create_stream(X509::LOG, [$columns=Info, $ev=log_x509, $path="x509"]);
Files::register_for_mime_type(Files::ANALYZER_X509, "application/x-x509-user-cert");
Files::register_for_mime_type(Files::ANALYZER_X509, "application/x-x509-ca-cert");
# Always calculate hashes. They are not necessary for base scripts
# but very useful for identification, and required for policy scripts
Files::register_for_mime_type(Files::ANALYZER_MD5, "application/x-x509-user-cert");
Files::register_for_mime_type(Files::ANALYZER_MD5, "application/x-x509-ca-cert");
Files::register_for_mime_type(Files::ANALYZER_SHA1, "application/x-x509-user-cert");
Files::register_for_mime_type(Files::ANALYZER_SHA1, "application/x-x509-ca-cert");
}
redef record Files::Info += {

View file

@ -446,6 +446,9 @@ type fa_metadata: record {
mime_type: string &optional;
## All matching mime types if any were discovered.
mime_types: mime_matches &optional;
## Specifies if the mime type was inferred using signatures,
## or provided by the protocol the file appeared in.
inferred: bool &default=T;
};
## Fields of a SYN packet.

View file

@ -101,12 +101,6 @@ event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priori
}
c$krb = info;
Files::add_analyzer(f, Files::ANALYZER_X509);
# Always calculate hashes. They are not necessary for base scripts
# but very useful for identification, and required for policy scripts
Files::add_analyzer(f, Files::ANALYZER_MD5);
Files::add_analyzer(f, Files::ANALYZER_SHA1);
}
function fill_in_subjects(c: connection)

View file

@ -236,10 +236,6 @@ event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priori
{
# Count up X509 certs.
++c$rdp$cert_count;
Files::add_analyzer(f, Files::ANALYZER_X509);
Files::add_analyzer(f, Files::ANALYZER_MD5);
Files::add_analyzer(f, Files::ANALYZER_SHA1);
}
}

View file

@ -114,12 +114,6 @@ event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priori
c$ssl$cert_chain[|c$ssl$cert_chain|] = f$info;
c$ssl$cert_chain_fuids[|c$ssl$cert_chain_fuids|] = f$id;
}
Files::add_analyzer(f, Files::ANALYZER_X509);
# Always calculate hashes. They are not necessary for base scripts
# but very useful for identification, and required for policy scripts.
Files::add_analyzer(f, Files::ANALYZER_MD5);
Files::add_analyzer(f, Files::ANALYZER_SHA1);
}
event ssl_established(c: connection) &priority=6

View file

@ -76,7 +76,7 @@ VectorVal* proc_padata(const KRB_PA_Data_Sequence* data, const BroAnalyzer bro_a
file_mgr->DataIn(reinterpret_cast<const u_char*>(cert.data()),
cert.length(), bro_analyzer->GetAnalyzerTag(),
bro_analyzer->Conn(), true, file_id);
bro_analyzer->Conn(), true, file_id, "application/x-x509-user-cert");
file_mgr->EndOfFile(file_id);
break;
@ -100,7 +100,7 @@ VectorVal* proc_padata(const KRB_PA_Data_Sequence* data, const BroAnalyzer bro_a
file_mgr->DataIn(reinterpret_cast<const u_char*>(cert.data()),
cert.length(), bro_analyzer->GetAnalyzerTag(),
bro_analyzer->Conn(), false, file_id);
bro_analyzer->Conn(), true, file_id, "application/x-x509-user-cert");
file_mgr->EndOfFile(file_id);
break;

View file

@ -142,7 +142,7 @@ refine flow RDP_Flow += {
connection()->bro_analyzer()->GetAnalyzerTag(),
connection()->bro_analyzer()->Conn(),
false, // It seems there are only server certs?
file_id);
file_id, "application/x-x509-user-cert");
file_mgr->EndOfFile(file_id);
return true;

View file

@ -9,6 +9,9 @@
common.AddRaw(is_orig ? "T" : "F", 1);
bro_analyzer()->Conn()->IDString(&common);
string user_mime = "application/x-x509-user-cert";
string ca_mime = "application/x-x509-ca-cert";
for ( unsigned int i = 0; i < certificates->size(); ++i )
{
const bytestring& cert = (*certificates)[i];
@ -21,7 +24,7 @@
file_mgr->DataIn(reinterpret_cast<const u_char*>(cert.data()),
cert.length(), bro_analyzer()->GetAnalyzerTag(),
bro_analyzer()->Conn(), is_orig, file_id);
bro_analyzer()->Conn(), is_orig, file_id, i == 0 ? user_mime : ca_mime);
file_mgr->EndOfFile(file_id);
}
return true;

View file

@ -55,6 +55,7 @@ int File::bof_buffer_size_idx = -1;
int File::bof_buffer_idx = -1;
int File::meta_mime_type_idx = -1;
int File::meta_mime_types_idx = -1;
int File::meta_inferred_idx = -1;
void File::StaticInit()
{
@ -76,6 +77,7 @@ void File::StaticInit()
bof_buffer_idx = Idx("bof_buffer", fa_file_type);
meta_mime_type_idx = Idx("mime_type", fa_metadata_type);
meta_mime_types_idx = Idx("mime_types", fa_metadata_type);
meta_inferred_idx = Idx("inferred", fa_metadata_type);
}
File::File(const string& file_id, const string& source_name, Connection* conn,
@ -290,6 +292,27 @@ void File::SetReassemblyBuffer(uint64 max)
reassembly_max_buffer = max;
}
bool File::SetMime(const string& mime_type)
{
if ( mime_type.empty() || bof_buffer.size != 0 )
return false;
did_metadata_inference = true;
bof_buffer.full = true;
if ( ! FileEventAvailable(file_sniff) )
return false;
val_list* vl = new val_list();
vl->append(val->Ref());
RecordVal* meta = new RecordVal(fa_metadata_type);
vl->append(meta);
meta->Assign(meta_mime_type_idx, new StringVal(mime_type));
meta->Assign(meta_inferred_idx, new Val(0, TYPE_BOOL));
FileEvent(file_sniff, vl);
return true;
}
void File::InferMetadata()
{
did_metadata_inference = true;

View file

@ -171,6 +171,25 @@ public:
*/
void FileEvent(EventHandlerPtr h, val_list* vl);
/**
* Sets the MIME type for a file to a specific value.
*
* Setting the MIME type has to be done before the MIME type is
* inferred from the content. After a MIME type has been set once,
* it cannot be changed anymore.
*
* This function should only be called when it does not make sense
* to perform automated MIME type detections. This is e.g. the case
* in protocols where the file type is fixed in the protocol description.
* This is for example the case for TLS and X.509 certificates.
*
* @param mime_type mime type to set
* @return true if the mime type was set. False if it could not be set because
* a mime type was already set or inferred.
*/
bool SetMime(const string& mime_type);
protected:
friend class Manager;
friend class FileReassembler;
@ -319,6 +338,7 @@ protected:
static int bof_buffer_idx;
static int mime_type_idx;
static int mime_types_idx;
static int meta_inferred_idx;
static int meta_mime_type_idx;
static int meta_mime_types_idx;

View file

@ -110,7 +110,7 @@ void Manager::SetHandle(const string& handle)
string Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
analyzer::Tag tag, Connection* conn, bool is_orig,
const string& precomputed_id)
const string& precomputed_id, const string& mime_type)
{
string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
File* file = GetFile(id, conn, tag, is_orig);
@ -118,6 +118,9 @@ string Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
if ( ! file )
return "";
if ( ! mime_type.empty() )
file->SetMime(mime_type);
file->DataIn(data, len, offset);
if ( file->IsComplete() )
@ -130,7 +133,8 @@ string Manager::DataIn(const u_char* data, uint64 len, uint64 offset,
}
string Manager::DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
Connection* conn, bool is_orig, const string& precomputed_id)
Connection* conn, bool is_orig, const string& precomputed_id,
const string& mime_type)
{
string id = precomputed_id.empty() ? GetFileID(tag, conn, is_orig) : precomputed_id;
// Sequential data input shouldn't be going over multiple conns, so don't
@ -140,6 +144,9 @@ string Manager::DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
if ( ! file )
return "";
if ( ! mime_type.empty() )
file->SetMime(mime_type);
file->DataIn(data, len);
if ( file->IsComplete() )

View file

@ -93,6 +93,12 @@ public:
* or false if is being sent in the opposite direction.
* @param precomputed_file_id may be set to a previous return value in order to
* bypass costly file handle lookups.
* @param mime_type may be set to the mime type of the file, if already known due
* to the protocol. This is, e.g., the case in TLS connections where X.509
* certificates are passed as files; here the type of the file is set by
* the protocol. If this parameter is give, mime type detection will be
* disabled.
* This parameter is only used for the first bit of data for each file.
* @return a unique file ID string which, in certain contexts, may be
* cached and passed back in to a subsequent function call in order
* to avoid costly file handle lookups (which have to go through
@ -101,7 +107,8 @@ public:
*/
std::string DataIn(const u_char* data, uint64 len, uint64 offset,
analyzer::Tag tag, Connection* conn, bool is_orig,
const std::string& precomputed_file_id = "");
const std::string& precomputed_file_id = "",
const std::string& mime_type = "");
/**
* Pass in sequential file data.
@ -113,6 +120,12 @@ public:
* or false if is being sent in the opposite direction.
* @param precomputed_file_id may be set to a previous return value in order to
* bypass costly file handle lookups.
* @param mime_type may be set to the mime type of the file, if already known due
* to the protocol. This is, e.g., the case in TLS connections where X.509
* certificates are passed as files; here the type of the file is set by
* the protocol. If this parameter is give, mime type detection will be
* disabled.
* This parameter is only used for the first bit of data for each file.
* @return a unique file ID string which, in certain contexts, may be
* cached and passed back in to a subsequent function call in order
* to avoid costly file handle lookups (which have to go through
@ -121,7 +134,8 @@ public:
*/
std::string DataIn(const u_char* data, uint64 len, analyzer::Tag tag,
Connection* conn, bool is_orig,
const std::string& precomputed_file_id = "");
const std::string& precomputed_file_id = "",
const std::string& mime_type = "");
/**
* Pass in sequential file data from external source (e.g. input framework).