diff --git a/scripts/base/files/x509/main.zeek b/scripts/base/files/x509/main.zeek index d91bd226e4..7b496f5593 100644 --- a/scripts/base/files/x509/main.zeek +++ b/scripts/base/files/x509/main.zeek @@ -6,7 +6,19 @@ module X509; export { redef enum Log::ID += { LOG }; - option required_encounters : count = 1; + ## How often do you have to encounter a certificate before + ## caching it. Set to 0 to disable caching of certificates. + option caching_required_encounters : count = 1; + + ## The timespan over which caching_required_encounters has to be reached + option caching_required_encounters_interval : interval = 1 mins; + + ## After a certificate has not been encountered for this time, it + ## may be evicted from the certificate cache. + option certificate_cache_minimum_eviction_interval : interval = 1 mins; + + ## Maximum size of the certificate cache + option certificate_cache_max_entries : count = 10000; ## The record type which contains the fields of the X.509 log. type Info: record { @@ -57,12 +69,50 @@ export { # Table tracking potential certificates to cache - indexed by the SHA256 of the # raw on-the-wire representation (DER). -global certificates_encountered: table[string] of count &create_expire=5mins; +global certificates_encountered: table[string] of count &create_expire=caching_required_encounters_interval; # Table caching the output of the X509 analyzer for commonly seen certificates. # This is indexed by SHA256 and contains the Info record of the first certificate # encountered. We use this info record to re-play the events. -global certificate_cache: table[string] of X509::Info &read_expire=1mins; +global certificate_cache: table[string] of X509::Info &read_expire=certificate_cache_minimum_eviction_interval; + +redef record Files::Info += { + ## Information about X509 certificates. This is used to keep + ## certificate information until all events have been received. + x509: X509::Info &optional; +}; + +function x509_certificate_cache_replay(f: fa_file, sha256: string) + { + # we encountered a cached cert. The X509 analyzer will skip it. Let's raise all the events that it typically + # raises by ourselfes. + + # first - let's checked if it already has an x509 record. That would mean that someone raised the file_hash event + # several times for the certificate - in which case we bail out. + if ( f$info?$x509 ) + return; + + local e = certificate_cache[sha256]; + event x509_certificate(f, e$handle, e$certificate); + for ( i in e$extensions_cache ) + { + local ext = e$extensions_cache[i]; + + if ( ext is X509::Extension ) + event x509_extension(f, (ext as X509::Extension)); + else if ( ext is X509::BasicConstraints ) + event x509_ext_basic_constraints(f, (ext as X509::BasicConstraints)); + else if ( ext is X509::SubjectAlternativeName ) + event x509_ext_subject_alternative_name(f, (ext as X509::SubjectAlternativeName)); + else if ( ext is X509::SctInfo ) + { + local s = ( ext as X509::SctInfo); + event x509_ocsp_ext_signed_certificate_timestamp(f, s$version, s$logid, s$timestamp, s$hash_alg, s$sig_alg, s$signature); + } + else + Reporter::error(fmt("Encountered unknown extension while replaying certificate with fuid %s", f$id)); + } + } event zeek_init() &priority=5 { @@ -92,14 +142,9 @@ event zeek_init() &priority=5 Files::register_for_mime_type(Files::ANALYZER_SHA256, "application/pkix-cert"); x509_set_certificate_cache(certificate_cache); + x509_set_certificate_cache_hit_callback(x509_certificate_cache_replay); } -redef record Files::Info += { - ## Information about X509 certificates. This is used to keep - ## certificate information until all events have been received. - x509: X509::Info &optional; -}; - event x509_certificate(f: fa_file, cert_ref: opaque of x509, cert: X509::Certificate) &priority=5 { f$info$x509 = [$ts=f$info$ts, $id=f$id, $certificate=cert, $handle=cert_ref]; @@ -145,7 +190,11 @@ event file_state_remove(f: fa_file) &priority=5 Log::write(LOG, f$info$x509); - if ( f$info?$sha256 && !certificate_cache[f$info$sha256] && certificates_encountered[f$info$sha256] >= required_encounters ) + if ( f$info?$sha256 && f$info$sha256 !in certificate_cache && + caching_required_encounters > 0 && + f$info$sha256 in certificates_encountered && + certificates_encountered[f$info$sha256] >= caching_required_encounters && + |certificate_cache| < certificate_cache_max_entries ) { delete certificates_encountered[f$info$sha256]; certificate_cache[f$info$sha256] = f$info$x509; @@ -158,44 +207,14 @@ event file_hash(f: fa_file, kind: string, hash: string) if ( ! f?$info || "X509" !in f$info$analyzers || kind != "sha256" ) return; - if ( hash in certificate_cache ) - { - print "Processing duplicate ", hash; - # we encountered a cached cert. The X509 analyzer will skip it. Let's raise all the events that it typically - # raises by ourselfes. - - # first - let's checked if it already has an x509 record. That would mean that someone raised the file_hash event - # several times for the certificate - in which case we bail out. - if ( f$info?$x509 ) - return; - - local e = certificate_cache[hash]; - event x509_certificate(f, e$handle, e$certificate); - for ( i in e$extensions_cache ) - { - local ext = e$extensions_cache[i]; - - if ( ext is X509::Extension ) - event x509_extension(f, (ext as X509::Extension)); - else if ( ext is X509::BasicConstraints ) - event x509_ext_basic_constraints(f, (ext as X509::BasicConstraints)); - else if ( ext is X509::SubjectAlternativeName ) - event x509_ext_subject_alternative_name(f, (ext as X509::SubjectAlternativeName)); - else if ( ext is X509::SctInfo ) - { - local s = ( ext as X509::SctInfo); - event x509_ocsp_ext_signed_certificate_timestamp(f, s$version, s$logid, s$timestamp, s$hash_alg, s$sig_alg, s$signature); - } - else - Reporter::error(fmt("Encountered unknown extension while replaying certificate with fuid %s", f$id)); - } - } + if ( caching_required_encounters == 0 ) + return; if ( hash !in certificates_encountered ) certificates_encountered[hash] = 0; certificates_encountered[hash] += 1; - if ( certificates_encountered[hash] < required_encounters ) + if ( certificates_encountered[hash] < caching_required_encounters ) return; } diff --git a/scripts/base/init-bare.zeek b/scripts/base/init-bare.zeek index c002907a78..6a54818010 100644 --- a/scripts/base/init-bare.zeek +++ b/scripts/base/init-bare.zeek @@ -512,6 +512,13 @@ type fa_file: record { bof_buffer: string &optional; } &redef; +## A function taking a fa_file and a string. Used by x509 analyzer as callback. +## +## .. todo:: We need this type definition only for declaring builtin functions +## via ``bifcl``. We should extend ``bifcl`` to understand composite types +## directly and then remove this alias. +type string_file_function: function(f: fa_file, str: string); + ## Metadata that's been inferred about a particular file. type fa_metadata: record { ## The strongest matching MIME type if one was discovered. diff --git a/src/file_analysis/analyzer/x509/X509.cc b/src/file_analysis/analyzer/x509/X509.cc index 4b288927a9..11b3731f67 100644 --- a/src/file_analysis/analyzer/x509/X509.cc +++ b/src/file_analysis/analyzer/x509/X509.cc @@ -55,9 +55,16 @@ bool file_analysis::X509::EndOfFile() auto index = make_intrusive(cert_sha256); if ( certificate_cache->Lookup(index.get(), false) ) // in this case, the certificate is in the cache and we do not - // do any further processing here + // do any further processing here. However, if there is a callback, we execute it. { - std::cerr << "Skipping " << cert_sha256 << std::endl; + if ( ! cache_hit_callback ) + return false; + // yup, let's call the callback. + + val_list vl(1); + vl.push_back(GetFile()->GetVal()->Ref()); + vl.push_back(new StringVal(cert_sha256)); + cache_hit_callback->Call(&vl); return false; } } diff --git a/src/file_analysis/analyzer/x509/X509.h b/src/file_analysis/analyzer/x509/X509.h index 72fb7702e2..c3dfea9554 100644 --- a/src/file_analysis/analyzer/x509/X509.h +++ b/src/file_analysis/analyzer/x509/X509.h @@ -7,6 +7,7 @@ #include "OpaqueVal.h" #include "X509Common.h" +#include "Func.h" #if ( OPENSSL_VERSION_NUMBER < 0x10002000L ) || defined(LIBRESSL_VERSION_NUMBER) @@ -118,6 +119,12 @@ public: static void SetCertificateCache(IntrusivePtr cache) { certificate_cache = cache; } + /** + * Sets the callback when a certificate cache hit is encountered + */ + static void SetCertificateCacheHitCallback(IntrusivePtr func) + { cache_hit_callback = func; } + protected: X509(RecordVal* args, File* file); @@ -134,6 +141,7 @@ private: /** X509 stores associated with global script-layer values */ inline static std::map x509_stores = std::map(); inline static IntrusivePtr certificate_cache = nullptr; + inline static IntrusivePtr cache_hit_callback = nullptr; }; /** diff --git a/src/file_analysis/analyzer/x509/functions.bif b/src/file_analysis/analyzer/x509/functions.bif index faa99d5afe..89aeedac7a 100644 --- a/src/file_analysis/analyzer/x509/functions.bif +++ b/src/file_analysis/analyzer/x509/functions.bif @@ -888,3 +888,10 @@ function x509_set_certificate_cache%(tbl: string_any_table%) : bool return val_mgr->GetBool(1); %} + +function x509_set_certificate_cache_hit_callback%(f: string_file_function%) : bool + %{ + file_analysis::X509::SetCertificateCacheHitCallback({NewRef{}, f->AsFunc()}); + + return val_mgr->GetBool(1); + %}