diff --git a/scripts/base/files/x509/main.zeek b/scripts/base/files/x509/main.zeek index e674ae8888..d91bd226e4 100644 --- a/scripts/base/files/x509/main.zeek +++ b/scripts/base/files/x509/main.zeek @@ -6,6 +6,8 @@ module X509; export { redef enum Log::ID += { LOG }; + option required_encounters : count = 1; + ## The record type which contains the fields of the X.509 log. type Info: record { ## Current timestamp. @@ -23,12 +25,45 @@ export { san: X509::SubjectAlternativeName &optional &log; ## Basic constraints extension of the certificate. basic_constraints: X509::BasicConstraints &optional &log; + ## All extensions in the order they were raised. + ## This is used for caching certificates that are commonly + ## encountered and should not be relied on in user scripts. + extensions_cache: vector of any &default=vector(); + }; + + ## This record is used to store information about the SCTs that are + ## encountered in Certificates. + type SctInfo: record { + ## The version of the encountered SCT (should always be 0 for v1). + version: count; + ## The ID of the log issuing this SCT. + logid: string; + ## The timestamp at which this SCT was issued measured since the + ## epoch (January 1, 1970, 00:00), ignoring leap seconds, in + ## milliseconds. Not converted to a Zeek timestamp because we need + ## the exact value for validation. + timestamp: count; + ## The hash algorithm used for this sct. + hash_alg: count; + ## The signature algorithm used for this sct. + sig_alg: count; + ## The signature of this SCT. + signature: string; }; ## Event for accessing logged records. global log_x509: event(rec: Info); } +# Table tracking potential certificates to cache - indexed by the SHA256 of the +# raw on-the-wire representation (DER). +global certificates_encountered: table[string] of count &create_expire=5mins; + +# Table caching the output of the X509 analyzer for commonly seen certificates. +# This is indexed by SHA256 and contains the Info record of the first certificate +# encountered. We use this info record to re-play the events. +global certificate_cache: table[string] of X509::Info &read_expire=1mins; + event zeek_init() &priority=5 { Log::create_stream(X509::LOG, [$columns=Info, $ev=log_x509, $path="x509"]); @@ -50,6 +85,13 @@ event zeek_init() &priority=5 Files::register_for_mime_type(Files::ANALYZER_SHA1, "application/x-x509-user-cert"); Files::register_for_mime_type(Files::ANALYZER_SHA1, "application/x-x509-ca-cert"); Files::register_for_mime_type(Files::ANALYZER_SHA1, "application/pkix-cert"); + + # SHA256 is used by us to determine which certificates to cache. + Files::register_for_mime_type(Files::ANALYZER_SHA256, "application/x-x509-user-cert"); + Files::register_for_mime_type(Files::ANALYZER_SHA256, "application/x-x509-ca-cert"); + Files::register_for_mime_type(Files::ANALYZER_SHA256, "application/pkix-cert"); + + x509_set_certificate_cache(certificate_cache); } redef record Files::Info += { @@ -66,19 +108,34 @@ event x509_certificate(f: fa_file, cert_ref: opaque of x509, cert: X509::Certifi event x509_extension(f: fa_file, ext: X509::Extension) &priority=5 { if ( f$info?$x509 ) + { f$info$x509$extensions += ext; + f$info$x509$extensions_cache += ext; + } } event x509_ext_basic_constraints(f: fa_file, ext: X509::BasicConstraints) &priority=5 { if ( f$info?$x509 ) + { f$info$x509$basic_constraints = ext; + f$info$x509$extensions_cache += ext; + } } event x509_ext_subject_alternative_name(f: fa_file, ext: X509::SubjectAlternativeName) &priority=5 { if ( f$info?$x509 ) + { f$info$x509$san = ext; + f$info$x509$extensions_cache += ext; + } + } + +event x509_ocsp_ext_signed_certificate_timestamp(f: fa_file, version: count, logid: string, timestamp: count, hash_algorithm: count, signature_algorithm: count, signature: string) &priority=5 + { + if ( f$info?$x509 ) + f$info$x509$extensions_cache += SctInfo($version=version, $logid=logid, $timestamp=timestamp, $hash_alg=hash_algorithm, $sig_alg=signature_algorithm, $signature=signature); } event file_state_remove(f: fa_file) &priority=5 @@ -87,4 +144,58 @@ event file_state_remove(f: fa_file) &priority=5 return; Log::write(LOG, f$info$x509); + + if ( f$info?$sha256 && !certificate_cache[f$info$sha256] && certificates_encountered[f$info$sha256] >= required_encounters ) + { + delete certificates_encountered[f$info$sha256]; + certificate_cache[f$info$sha256] = f$info$x509; + } + + } + +event file_hash(f: fa_file, kind: string, hash: string) + { + if ( ! f?$info || "X509" !in f$info$analyzers || kind != "sha256" ) + return; + + if ( hash in certificate_cache ) + { + print "Processing duplicate ", hash; + # we encountered a cached cert. The X509 analyzer will skip it. Let's raise all the events that it typically + # raises by ourselfes. + + # first - let's checked if it already has an x509 record. That would mean that someone raised the file_hash event + # several times for the certificate - in which case we bail out. + if ( f$info?$x509 ) + return; + + local e = certificate_cache[hash]; + event x509_certificate(f, e$handle, e$certificate); + for ( i in e$extensions_cache ) + { + local ext = e$extensions_cache[i]; + + if ( ext is X509::Extension ) + event x509_extension(f, (ext as X509::Extension)); + else if ( ext is X509::BasicConstraints ) + event x509_ext_basic_constraints(f, (ext as X509::BasicConstraints)); + else if ( ext is X509::SubjectAlternativeName ) + event x509_ext_subject_alternative_name(f, (ext as X509::SubjectAlternativeName)); + else if ( ext is X509::SctInfo ) + { + local s = ( ext as X509::SctInfo); + event x509_ocsp_ext_signed_certificate_timestamp(f, s$version, s$logid, s$timestamp, s$hash_alg, s$sig_alg, s$signature); + } + else + Reporter::error(fmt("Encountered unknown extension while replaying certificate with fuid %s", f$id)); + } + } + + if ( hash !in certificates_encountered ) + certificates_encountered[hash] = 0; + + certificates_encountered[hash] += 1; + + if ( certificates_encountered[hash] < required_encounters ) + return; } diff --git a/scripts/base/init-bare.zeek b/scripts/base/init-bare.zeek index 65cbb97abe..c002907a78 100644 --- a/scripts/base/init-bare.zeek +++ b/scripts/base/init-bare.zeek @@ -11,6 +11,13 @@ ## directly and then remove this alias. type string_array: table[count] of string; +## A string-table of any. +## +## .. todo:: We need this type definition only for declaring builtin functions +## via ``bifcl``. We should extend ``bifcl`` to understand composite types +## directly and then remove this alias. +type string_any_table: table[string] of any; + ## A set of strings. ## ## .. todo:: We need this type definition only for declaring builtin functions diff --git a/src/file_analysis/analyzer/x509/X509.cc b/src/file_analysis/analyzer/x509/X509.cc index a96f4e2fff..4b288927a9 100644 --- a/src/file_analysis/analyzer/x509/X509.cc +++ b/src/file_analysis/analyzer/x509/X509.cc @@ -19,9 +19,7 @@ #include #include -namespace file_analysis { -std::map X509::x509_stores; -} +#include using namespace file_analysis; @@ -45,10 +43,27 @@ bool file_analysis::X509::Undelivered(uint64_t offset, uint64_t len) bool file_analysis::X509::EndOfFile() { + const unsigned char* cert_char = reinterpret_cast(cert_data.data()); + if ( certificate_cache ) + { + // first step - let's see if the certificate has been cached. + unsigned char buf[SHA256_DIGEST_LENGTH]; + auto ctx = hash_init(Hash_SHA256); + hash_update(ctx, cert_char, cert_data.size()); + hash_final(ctx, buf); + std::string cert_sha256 = sha256_digest_print(buf); + auto index = make_intrusive(cert_sha256); + if ( certificate_cache->Lookup(index.get(), false) ) + // in this case, the certificate is in the cache and we do not + // do any further processing here + { + std::cerr << "Skipping " << cert_sha256 << std::endl; + return false; + } + } + // ok, now we can try to parse the certificate with openssl. Should // be rather straightforward... - const unsigned char* cert_char = reinterpret_cast(cert_data.data()); - ::X509* ssl_cert = d2i_X509(NULL, &cert_char, cert_data.size()); if ( ! ssl_cert ) { diff --git a/src/file_analysis/analyzer/x509/X509.h b/src/file_analysis/analyzer/x509/X509.h index 59137f7fd3..72fb7702e2 100644 --- a/src/file_analysis/analyzer/x509/X509.h +++ b/src/file_analysis/analyzer/x509/X509.h @@ -112,6 +112,12 @@ public: */ static void FreeRootStore(); + /** + * Sets the table[string] that used as the certificate cache inside of Zeek. + */ + static void SetCertificateCache(IntrusivePtr cache) + { certificate_cache = cache; } + protected: X509(RecordVal* args, File* file); @@ -126,7 +132,8 @@ private: static StringVal* KeyCurve(EVP_PKEY *key); static unsigned int KeyLength(EVP_PKEY *key); /** X509 stores associated with global script-layer values */ - static std::map x509_stores; + inline static std::map x509_stores = std::map(); + inline static IntrusivePtr certificate_cache = nullptr; }; /** diff --git a/src/file_analysis/analyzer/x509/functions.bif b/src/file_analysis/analyzer/x509/functions.bif index 7c53e27bf3..faa99d5afe 100644 --- a/src/file_analysis/analyzer/x509/functions.bif +++ b/src/file_analysis/analyzer/x509/functions.bif @@ -881,3 +881,10 @@ function x509_spki_hash%(cert: opaque of x509, hash_alg: count%): string return x509_entity_hash(cert_handle, hash_alg, 2); %} + +function x509_set_certificate_cache%(tbl: string_any_table%) : bool + %{ + file_analysis::X509::SetCertificateCache({NewRef{}, tbl->AsTableVal()}); + + return val_mgr->GetBool(1); + %}