From e2fbee905429f909db188d1baa46fc71d9a38d99 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Thu, 11 Apr 2013 11:24:18 -0500 Subject: [PATCH 01/10] FileAnalysis: add more params to some events. --- scripts/base/init-bare.bro | 5 ++ scripts/base/protocols/http/file-ident.bro | 11 ++-- src/event.bif | 41 +++++++++++++-- src/file_analysis/File.cc | 61 +++++++++++++++++----- src/file_analysis/File.h | 16 ++++++ src/file_analysis/Manager.cc | 14 +---- src/file_analysis/Manager.h | 9 +--- testing/scripts/file-analysis-test.bro | 4 +- 8 files changed, 113 insertions(+), 48 deletions(-) diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro index 7f4d29d26b..07433512a2 100644 --- a/scripts/base/init-bare.bro +++ b/scripts/base/init-bare.bro @@ -316,7 +316,12 @@ type connection: record { tunnel: EncapsulatingConnVector &optional; }; +## Default amount of time a file can be inactive before the file analysis +## gives up and discards any internal state related to the file. const default_file_timeout_interval: interval = 2 mins &redef; + +## Default amount of bytes that file analysis will buffer before raising +## :bro:see:`file_new`. const default_file_bof_buffer_size: count = 1024 &redef; ## A file that Bro is analyzing. This is Bro's type for describing the basic diff --git a/scripts/base/protocols/http/file-ident.bro b/scripts/base/protocols/http/file-ident.bro index 10ff239aa0..9996a70faa 100644 --- a/scripts/base/protocols/http/file-ident.bro +++ b/scripts/base/protocols/http/file-ident.bro @@ -66,23 +66,18 @@ event file_new(f: fa_file) &priority=5 } } -event file_over_new_connection(f: fa_file) &priority=5 +event file_over_new_connection(f: fa_file, c: connection) &priority=5 { if ( ! f?$source ) return; if ( f$source != "HTTP" ) return; if ( ! f?$mime_type ) return; - if ( ! f?$conns ) return; + if ( ! c?$http ) return; # Spread the mime around (e.g. for partial content, file_type event only # happens once for the first connection, but if there's subsequent # connections to transfer the same file, they'll be lacking the mime_type # field if we don't do this). - for ( cid in f$conns ) - { - local c: connection = f$conns[cid]; - if ( ! c?$http ) next; - c$http$mime_type = f$mime_type; - } + c$http$mime_type = f$mime_type; } # Tracks byte-range request / partial content response mime types, indexed diff --git a/src/event.bif b/src/event.bif index 08a2b64a84..763d3f0733 100644 --- a/src/event.bif +++ b/src/event.bif @@ -7000,17 +7000,48 @@ event event_queue_flush_point%(%); ## .. bro:see:: set_file_handle event get_file_handle%(tag: count, c: connection, is_orig: bool%); -# TODO: document +## Indicates that a analysis of a new file has begun. The analysis can be +## augmented at this time via :bro:see:`FileAnalysis::add_action`. +## +## f: The file. +## +## .. bro:see:: file_over_new_connection file_timeout file_gap file_state_remove event file_new%(f: fa_file%); -# TODO: give the new connection -event file_over_new_connection%(f: fa_file%); + +## Indicates that a file has been seen being transferred over a connection +## different from the original. +## +## f: The file. +## +## c: The new connection over which the file is seen being transferred. +## +## .. bro:see:: file_new file_timeout file_gap file_state_remove +event file_over_new_connection%(f: fa_file, c: connection%); + +## Indicates that file analysis has timed out because no activity was seen +## for the file in a while. +## +## f: The file. +## +## .. bro:see:: file_new file_over_new_connection file_gap file_state_remove +## default_file_timeout_interval event file_timeout%(f: fa_file%); -# TODO: give size of gap -event file_gap%(f: fa_file%); + +## Indicates that a chunk of the file is missing. +## +## f: The file. +## +## offset: The byte offset from the start of the file at which the gap begins. +## +## len: The number of missing bytes. +## +## .. bro:see:: file_new file_over_new_connection file_timeout file_state_remove +event file_gap%(f: fa_file, offset: count, len: count%); ## This event is generated each time file analysis is ending for a given file. ## ## f: The file. +## .. bro:see:: file_new file_over_new_connection file_timeout file_gap event file_state_remove%(f: fa_file%); ## This event is generated each time file analysis generates a digest of the diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc index a4713c32fe..b45af0c281 100644 --- a/src/file_analysis/File.cc +++ b/src/file_analysis/File.cc @@ -145,9 +145,16 @@ void File::UpdateConnectionFields(Connection* conn) Val* idx = get_conn_id_val(conn); if ( ! conns->AsTableVal()->Lookup(idx) ) { - conns->AsTableVal()->Assign(idx, conn->BuildConnVal()); - if ( ! is_first ) - file_mgr->FileEvent(file_over_new_connection, this); + Val* conn_val = conn->BuildConnVal(); + conns->AsTableVal()->Assign(idx, conn_val); + + if ( ! is_first && FileEventAvailable(file_over_new_connection) ) + { + val_list* vl = new val_list(); + vl->append(val->Ref()); + vl->append(conn_val->Ref()); + FileEvent(file_over_new_connection, vl); + } } Unref(idx); @@ -266,8 +273,7 @@ void File::ReplayBOF() DetectTypes(bs->Bytes(), bs->Len()); - file_mgr->FileEvent(file_new, this); - mgr.Drain(); // need immediate feedback about actions to add + FileEvent(file_new); for ( size_t i = 0; i < bof_buffer.chunks.size(); ++i ) DataIn(bof_buffer.chunks[i]->Bytes(), bof_buffer.chunks[i]->Len()); @@ -281,9 +287,7 @@ void File::DataIn(const u_char* data, uint64 len, uint64 offset) { // TODO: this should all really be delayed until we attempt reassembly DetectTypes(data, len); - file_mgr->FileEvent(file_new, this); - mgr.Drain(); // need immediate feedback about actions to add - actions.DrainModifications(); + FileEvent(file_new); first_chunk = false; } @@ -318,9 +322,7 @@ void File::DataIn(const u_char* data, uint64 len) if ( missed_bof ) { DetectTypes(data, len); - file_mgr->FileEvent(file_new, this); - mgr.Drain(); // need immediate feedback about actions to add - actions.DrainModifications(); + FileEvent(file_new); missed_bof = false; } @@ -366,7 +368,7 @@ void File::EndOfFile() actions.QueueRemoveAction(act->Args()); } - file_mgr->FileEvent(file_state_remove, this); + FileEvent(file_state_remove); actions.DrainModifications(); } @@ -388,8 +390,41 @@ void File::Gap(uint64 offset, uint64 len) actions.QueueRemoveAction(act->Args()); } - file_mgr->FileEvent(file_gap, this); + if ( FileEventAvailable(file_gap) ) + { + val_list* vl = new val_list(); + vl->append(val->Ref()); + vl->append(new Val(offset, TYPE_COUNT)); + vl->append(new Val(len, TYPE_COUNT)); + FileEvent(file_gap, vl); + } actions.DrainModifications(); IncrementByteCount(len, missing_bytes_idx); } + +bool File::FileEventAvailable(EventHandlerPtr h) + { + return h && ! file_mgr->IsIgnored(unique); + } + +void File::FileEvent(EventHandlerPtr h) + { + if ( ! FileEventAvailable(h) ) return; + + val_list* vl = new val_list(); + vl->append(val->Ref()); + FileEvent(h, vl); + } + +void File::FileEvent(EventHandlerPtr h, val_list* vl) + { + mgr.QueueEvent(h, vl); + + if ( h == file_new || h == file_timeout ) + { + // immediate feedback is required for these events. + mgr.Drain(); + actions.DrainModifications(); + } + } diff --git a/src/file_analysis/File.h b/src/file_analysis/File.h index aaa172b8b2..bfb24a72db 100644 --- a/src/file_analysis/File.h +++ b/src/file_analysis/File.h @@ -106,6 +106,22 @@ public: */ void Gap(uint64 offset, uint64 len); + /** + * @return true if event has a handler and the file isn't ignored. + */ + bool FileEventAvailable(EventHandlerPtr h); + + /** + * Raises an event related to the file's life-cycle, the only parameter + * to that event is the \c fa_file record.. + */ + void FileEvent(EventHandlerPtr h); + + /** + * Raises an event related to the file's life-cycle. + */ + void FileEvent(EventHandlerPtr h, val_list* vl); + protected: /** diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index 1d9849d6b8..0f9a75bb2f 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -147,17 +147,6 @@ void Manager::SetSize(uint64 size, File* file) RemoveFile(file->GetUnique()); } -void Manager::FileEvent(EventHandlerPtr h, File* file) - { - if ( ! h ) return; - if ( IsIgnored(file->GetUnique()) ) return; - - val_list * vl = new val_list(); - vl->append(file->GetVal()->Ref()); - - mgr.QueueEvent(h, vl); - } - bool Manager::PostponeTimeout(const FileID& file_id) const { File* file = Lookup(file_id); @@ -235,8 +224,7 @@ void Manager::Timeout(const FileID& file_id, bool is_terminating) file->postpone_timeout = false; - FileEvent(file_timeout, file); - mgr.Drain(); // need immediate feedback about whether to postpone + file->FileEvent(file_timeout); if ( file->postpone_timeout && ! is_terminating ) { diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index 0fc6384e40..f01b6c8503 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -111,9 +111,9 @@ public: bool RemoveAction(const FileID& file_id, const RecordVal* args) const; /** - * Queues an event related to the file's life-cycle. + * @return whether the file mapped to \a unique is being ignored. */ - void FileEvent(EventHandlerPtr h, File* file); + bool IsIgnored(const string& unique); protected: @@ -149,11 +149,6 @@ protected: */ bool RemoveFile(const string& unique); - /** - * @return whether the file mapped to \a unique is being ignored. - */ - bool IsIgnored(const string& unique); - /** * Sets #current_handle to a unique file handle string based on what the * \c get_file_handle event derives from the connection params. The diff --git a/testing/scripts/file-analysis-test.bro b/testing/scripts/file-analysis-test.bro index 0af335e75c..3d7e86a34e 100644 --- a/testing/scripts/file-analysis-test.bro +++ b/testing/scripts/file-analysis-test.bro @@ -62,7 +62,7 @@ event file_new(f: fa_file) } } -event file_over_new_connection(f: fa_file) +event file_over_new_connection(f: fa_file, c: connection) { print "FILE_OVER_NEW_CONNECTION"; } @@ -72,7 +72,7 @@ event file_timeout(f: fa_file) print "FILE_TIMEOUT"; } -event file_gap(f: fa_file) +event file_gap(f: fa_file, offset: count, len: count) { print "FILE_GAP"; } From 2fba37e2779cdc545e75ceb9a47dc10e7ecd326e Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Thu, 11 Apr 2013 12:08:46 -0500 Subject: [PATCH 02/10] FileAnalysis: add bif for setting timeout interval --- .../base/frameworks/file-analysis/main.bro | 20 ++++++++++++++++++- src/event.bif | 3 ++- src/file_analysis.bif | 9 +++++++++ src/file_analysis/File.cc | 5 +++++ src/file_analysis/File.h | 5 +++++ src/file_analysis/Manager.cc | 10 ++++++++++ src/file_analysis/Manager.h | 5 +++++ 7 files changed, 55 insertions(+), 2 deletions(-) diff --git a/scripts/base/frameworks/file-analysis/main.bro b/scripts/base/frameworks/file-analysis/main.bro index 6ca9b52087..dbfc95ac31 100644 --- a/scripts/base/frameworks/file-analysis/main.bro +++ b/scripts/base/frameworks/file-analysis/main.bro @@ -120,10 +120,23 @@ export { ## generate two handles that would hash to the same file id. const salt = "I recommend changing this." &redef; + ## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is + ## used to determine the length of inactivity that is allowed for a file + ## before internal state related to it is cleaned up. + ## + ## f: the file. + ## + ## t: the amount of time the file can remain inactive before discarding. + ## + ## Returns: true if the timeout interval was set, or false if analysis + ## for the *id* isn't currently active. + global set_timeout_interval: function(f: fa_file, t: interval): bool; + ## Postpones the timeout of file analysis for a given file. ## When used within a :bro:see:`file_timeout` handler for, the analysis ## the analysis will delay timing out for the period of time indicated by - ## the *timeout_interval* field of :bro:see:`fa_file`. + ## the *timeout_interval* field of :bro:see:`fa_file`, which can be set + ## with :bro:see:`FileAnalysis::set_timeout_interval`. ## ## f: the file. ## @@ -243,6 +256,11 @@ function set_info(f: fa_file) add f$info$conn_uids[f$conns[cid]$uid]; } +function set_timeout_interval(f: fa_file, t: interval): bool + { + return __set_timeout_interval(f$id, t); + } + function postpone_timeout(f: fa_file): bool { return __postpone_timeout(f$id); diff --git a/src/event.bif b/src/event.bif index 763d3f0733..dc5dda51bb 100644 --- a/src/event.bif +++ b/src/event.bif @@ -7024,7 +7024,8 @@ event file_over_new_connection%(f: fa_file, c: connection%); ## f: The file. ## ## .. bro:see:: file_new file_over_new_connection file_gap file_state_remove -## default_file_timeout_interval +## default_file_timeout_interval FileAnalysis::postpone_timeout +## FileAnalysis::set_timeout_interval event file_timeout%(f: fa_file%); ## Indicates that a chunk of the file is missing. diff --git a/src/file_analysis.bif b/src/file_analysis.bif index b3e34f93d2..12b176808a 100644 --- a/src/file_analysis.bif +++ b/src/file_analysis.bif @@ -14,6 +14,15 @@ function FileAnalysis::__postpone_timeout%(file_id: string%): bool return new Val(result, TYPE_BOOL); %} +## :bro:see:`FileAnalysis::set_timeout_interval`. +function FileAnalysis::__set_timeout_interval%(file_id: string, t: interval%): bool + %{ + using file_analysis::FileID; + bool result = file_mgr->SetTimeoutInterval(FileID(file_id->CheckString()), + t); + return new Val(result, TYPE_BOOL); + %} + ## :bro:see:`FileAnalysis::add_action`. function FileAnalysis::__add_action%(file_id: string, args: any%): bool %{ diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc index b45af0c281..2da64e5c72 100644 --- a/src/file_analysis/File.cc +++ b/src/file_analysis/File.cc @@ -189,6 +189,11 @@ double File::GetTimeoutInterval() const return LookupFieldDefaultInterval(timeout_interval_idx); } +void File::SetTimeoutInterval(double interval) + { + val->Assign(timeout_interval_idx, new Val(interval, TYPE_INTERVAL)); + } + void File::IncrementByteCount(uint64 size, int field_idx) { uint64 old = LookupFieldDefaultCount(field_idx); diff --git a/src/file_analysis/File.h b/src/file_analysis/File.h index bfb24a72db..bede666f13 100644 --- a/src/file_analysis/File.h +++ b/src/file_analysis/File.h @@ -34,6 +34,11 @@ public: */ double GetTimeoutInterval() const; + /** + * Set the "timeout_interval" field from #val record to \a interval seconds. + */ + void SetTimeoutInterval(double interval); + /** * @return value of the "id" field from #val record. */ diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index 0f9a75bb2f..4f7443d535 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -157,6 +157,16 @@ bool Manager::PostponeTimeout(const FileID& file_id) const return true; } +bool Manager::SetTimeoutInterval(const FileID& file_id, double interval) const + { + File* file = Lookup(file_id); + + if ( ! file ) return false; + + file->SetTimeoutInterval(interval); + return true; + } + bool Manager::AddAction(const FileID& file_id, RecordVal* args) const { File* file = Lookup(file_id); diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index f01b6c8503..26d07cd5c4 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -96,6 +96,11 @@ public: */ bool PostponeTimeout(const FileID& file_id) const; + /** + * Set's an inactivity threshold for the file. + */ + bool SetTimeoutInterval(const FileID& file_id, double interval) const; + /** * Queue attachment of an action to the file identifier. Multiple actions * of a given type can be attached per file identifier at a time as long as From e81f2ae7b0d62c2761983456eac48d8c3fff986b Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Thu, 11 Apr 2013 13:11:46 -0500 Subject: [PATCH 03/10] FileAnalysis: libmagic tweaks. Remove verbose file type detection and automatically strip out charset from mime type. --- .../base/frameworks/file-analysis/main.bro | 6 ---- scripts/base/init-bare.bro | 5 ---- scripts/base/protocols/http/file-ident.bro | 2 +- src/file_analysis/File.cc | 28 +++++++++---------- src/file_analysis/File.h | 10 +++---- .../out | 1 - .../bro..stdout | 2 -- .../get.out | 1 - .../get.out | 1 - .../out | 1 - .../get-gzip.out | 1 - .../get.out | 1 - .../a.out | 1 - .../b.out | 2 -- .../c.out | 1 - .../out | 5 ---- .../out | 2 -- .../bro..stdout | 1 - .../out | 1 - .../file_analysis.log | 10 +++---- .../out | 3 -- testing/scripts/diff-remove-mime-types | 2 +- testing/scripts/file-analysis-test.bro | 10 ++----- 23 files changed, 27 insertions(+), 70 deletions(-) diff --git a/scripts/base/frameworks/file-analysis/main.bro b/scripts/base/frameworks/file-analysis/main.bro index dbfc95ac31..e148248727 100644 --- a/scripts/base/frameworks/file-analysis/main.bro +++ b/scripts/base/frameworks/file-analysis/main.bro @@ -72,11 +72,6 @@ export { ## inspection in *bof_buffer* field. bof_buffer_size: count &log &optional; - ## A file type provided by libmagic against the *bof_buffer*, or - ## in the cases where no buffering of the beginning of file occurs, - ## an initial guess of the file type based on the first data seen. - file_type: string &log &optional; - ## A mime type provided by libmagic against the *bof_buffer*, or ## in the cases where no buffering of the beginning of file occurs, ## an initial guess of the mime type based on the first data seen. @@ -249,7 +244,6 @@ function set_info(f: fa_file) f$info$overflow_bytes = f$overflow_bytes; f$info$timeout_interval = f$timeout_interval; f$info$bof_buffer_size = f$bof_buffer_size; - if ( f?$file_type ) f$info$file_type = f$file_type; if ( f?$mime_type ) f$info$mime_type = f$mime_type; if ( f?$conns ) for ( cid in f$conns ) diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro index 07433512a2..e59fa6fd72 100644 --- a/scripts/base/init-bare.bro +++ b/scripts/base/init-bare.bro @@ -374,11 +374,6 @@ type fa_file: record { ## This is also the buffer that's used for file/mime type detection. bof_buffer: string &optional; - ## A file type provided by libmagic against the *bof_buffer*, or - ## in the cases where no buffering of the beginning of file occurs, - ## an initial guess of the file type based on the first data seen. - file_type: string &optional; - ## A mime type provided by libmagic against the *bof_buffer*, or ## in the cases where no buffering of the beginning of file occurs, ## an initial guess of the mime type based on the first data seen. diff --git a/scripts/base/protocols/http/file-ident.bro b/scripts/base/protocols/http/file-ident.bro index 9996a70faa..7ed4b58a37 100644 --- a/scripts/base/protocols/http/file-ident.bro +++ b/scripts/base/protocols/http/file-ident.bro @@ -49,7 +49,7 @@ event file_new(f: fa_file) &priority=5 c$http$mime_type = f$mime_type; - local mime_str: string = split1(f$mime_type, /;/)[1]; + local mime_str: string = c$http$mime_type; if ( mime_str !in mime_types_extensions ) next; if ( ! c$http?$uri ) next; diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc index 2da64e5c72..3e7e1d7b64 100644 --- a/src/file_analysis/File.cc +++ b/src/file_analysis/File.cc @@ -45,10 +45,8 @@ int File::overflow_bytes_idx = -1; int File::timeout_interval_idx = -1; int File::bof_buffer_size_idx = -1; int File::bof_buffer_idx = -1; -int File::file_type_idx = -1; int File::mime_type_idx = -1; -magic_t File::magic = 0; magic_t File::magic_mime = 0; string File::salt; @@ -69,10 +67,8 @@ void File::StaticInit() timeout_interval_idx = Idx("timeout_interval"); bof_buffer_size_idx = Idx("bof_buffer_size"); bof_buffer_idx = Idx("bof_buffer"); - file_type_idx = Idx("file_type"); mime_type_idx = Idx("mime_type"); - bro_init_magic(&magic, MAGIC_NONE); bro_init_magic(&magic_mime, MAGIC_MIME); salt = BifConst::FileAnalysis::salt->CheckString(); @@ -247,18 +243,22 @@ bool File::BufferBOF(const u_char* data, uint64 len) return true; } -bool File::DetectTypes(const u_char* data, uint64 len) +bool File::DetectMIME(const u_char* data, uint64 len) { - const char* desc = bro_magic_buffer(magic, data, len); const char* mime = bro_magic_buffer(magic_mime, data, len); - if ( desc ) - val->Assign(file_type_idx, new StringVal(desc)); - if ( mime ) - val->Assign(mime_type_idx, new StringVal(mime)); + { + const char* mime_end = strchr(mime, ';'); - return desc || mime; + if ( mime_end ) + // strip off charset + val->Assign(mime_type_idx, new StringVal(mime_end - mime, mime)); + else + val->Assign(mime_type_idx, new StringVal(mime)); + } + + return mime; } void File::ReplayBOF() @@ -276,7 +276,7 @@ void File::ReplayBOF() BroString* bs = concatenate(bof_buffer.chunks); val->Assign(bof_buffer_idx, new StringVal(bs)); - DetectTypes(bs->Bytes(), bs->Len()); + DetectMIME(bs->Bytes(), bs->Len()); FileEvent(file_new); @@ -291,7 +291,7 @@ void File::DataIn(const u_char* data, uint64 len, uint64 offset) if ( first_chunk ) { // TODO: this should all really be delayed until we attempt reassembly - DetectTypes(data, len); + DetectMIME(data, len); FileEvent(file_new); first_chunk = false; } @@ -326,7 +326,7 @@ void File::DataIn(const u_char* data, uint64 len) if ( missed_bof ) { - DetectTypes(data, len); + DetectMIME(data, len); FileEvent(file_new); missed_bof = false; } diff --git a/src/file_analysis/File.h b/src/file_analysis/File.h index bede666f13..2406f4a32a 100644 --- a/src/file_analysis/File.h +++ b/src/file_analysis/File.h @@ -170,11 +170,11 @@ protected: void ReplayBOF(); /** - * Does file/mime type detection and assigns types (if available) to - * corresponding fields in #val. - * @return whether a file or mime type was available. + * Does mime type detection and assigns type (if available) to \c mime_type + * field in #val. + * @return whether mime type was available. */ - bool DetectTypes(const u_char* data, uint64 len); + bool DetectMIME(const u_char* data, uint64 len); FileID id; /**< A pretty hash that likely identifies file */ string unique; /**< A string that uniquely identifies file */ @@ -207,7 +207,6 @@ protected: */ static void StaticInit(); - static magic_t magic; static magic_t magic_mime; static string salt; @@ -224,7 +223,6 @@ protected: static int timeout_interval_idx; static int bof_buffer_size_idx; static int bof_buffer_idx; - static int file_type_idx; static int mime_type_idx; }; diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out index 65744f55d6..c16f2bc1e1 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out @@ -3,7 +3,6 @@ Cx92a0ym5R8, 0, 0 FILE_BOF_BUFFER ^J0.26 | 201 FILE_TYPE -file type is set mime type is set file_stream, Cx92a0ym5R8, 1500, ^J0.26 | 2012-08-24 15:10:04 -0700^J^J * Fixing update-changes, which could pick the wrong control file. (Robin Sommer)^J^J * Fixing GPG signing script. (Robin Sommer)^J^J0.25 | 2012-08-01 13:55:46 -0500^J^J * Fix configure script to exit with non-zero status on error (Jon Siwek)^J^J0.24 | 2012-07-05 12:50:43 -0700^J^J * Raise minimum required CMake version to 2.6.3 (Jon Siwek)^J^J * Adding script to delete old fully-merged branches. (Robin Sommer)^J^J0.23-2 | 2012-01-25 13:24:01 -0800^J^J * Fix a bro-cut error message. (Daniel Thayer)^J^J0.23 | 2012-01-11 12:16:11 -0800^J^J * Tweaks to release scripts, plus a new one for signing files.^J (Robin Sommer)^J^J0.22 | 2012-01-10 16:45:19 -0800^J^J * Tweaks for OpenBSD support. (Jon Siwek)^J^J * bro-cut extensions and fixes. (Robin Sommer)^J ^J - If no field names are given on the command line, we now pass through^J all fields. Adresses #657.^J^J - Removing some GNUism from awk script. Addresses #653.^J^J - Added option for time output in UTC. Addresses #668.^J^J - Added output field separator option -F. Addresses #649.^J^J - Fixing option -c: only some header lines were passed through^J rather than all. (Robin Sommer)^J^J * Fix parallel make portability. (Jon Siwek)^J^J0.21-9 | 2011-11-07 05:44:14 -0800^J^J * Fixing compiler warnings. Addresses #388. (Jon Siwek)^J^J0.21-2 | 2011-11-02 18:12:13 -0700^J^J * Fix for misnaming temp file in update-changes script. (Robin Sommer)^J^J0.21-1 | 2011-11-02 18:10:39 -0700^J^J * Little fix for make-relea file_chunk, Cx92a0ym5R8, 1500, 0, ^J0.26 | 2012-08-24 15:10:04 -0700^J^J * Fixing update-changes, which could pick the wrong control file. (Robin Sommer)^J^J * Fixing GPG signing script. (Robin Sommer)^J^J0.25 | 2012-08-01 13:55:46 -0500^J^J * Fix configure script to exit with non-zero status on error (Jon Siwek)^J^J0.24 | 2012-07-05 12:50:43 -0700^J^J * Raise minimum required CMake version to 2.6.3 (Jon Siwek)^J^J * Adding script to delete old fully-merged branches. (Robin Sommer)^J^J0.23-2 | 2012-01-25 13:24:01 -0800^J^J * Fix a bro-cut error message. (Daniel Thayer)^J^J0.23 | 2012-01-11 12:16:11 -0800^J^J * Tweaks to release scripts, plus a new one for signing files.^J (Robin Sommer)^J^J0.22 | 2012-01-10 16:45:19 -0800^J^J * Tweaks for OpenBSD support. (Jon Siwek)^J^J * bro-cut extensions and fixes. (Robin Sommer)^J ^J - If no field names are given on the command line, we now pass through^J all fields. Adresses #657.^J^J - Removing some GNUism from awk script. Addresses #653.^J^J - Added option for time output in UTC. Addresses #668.^J^J - Added output field separator option -F. Addresses #649.^J^J - Fixing option -c: only some header lines were passed through^J rather than all. (Robin Sommer)^J^J * Fix parallel make portability. (Jon Siwek)^J^J0.21-9 | 2011-11-07 05:44:14 -0800^J^J * Fixing compiler warnings. Addresses #388. (Jon Siwek)^J^J0.21-2 | 2011-11-02 18:12:13 -0700^J^J * Fix for misnaming temp file in update-changes script. (Robin Sommer)^J^J0.21-1 | 2011-11-02 18:10:39 -0700^J^J * Little fix for make-relea diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.postpone_timeout/bro..stdout b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.postpone_timeout/bro..stdout index 09b82fb655..67a56e0d89 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.postpone_timeout/bro..stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.postpone_timeout/bro..stdout @@ -1,7 +1,6 @@ FILE_NEW oDwT1BbzjM1, 0, 0 FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE oDwT1BbzjM1, 1022920, 0 @@ -11,7 +10,6 @@ source: HTTP FILE_NEW oDwT1BbzjM1, 0, 0 FILE_TYPE -file type is set mime type is set FILE_TIMEOUT FILE_TIMEOUT diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out index cd7c150023..d3ba15b958 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out @@ -3,7 +3,6 @@ Cx92a0ym5R8, 0, 0 FILE_BOF_BUFFER ^J0.26 | 201 FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE Cx92a0ym5R8, 4705, 0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out index 0c9b0151cc..3d082df87d 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out @@ -3,5 +3,4 @@ Cx92a0ym5R8, 0, 0 FILE_BOF_BUFFER ^J0.26 | 201 FILE_TYPE -file type is set mime type is set diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out index 3bc7a26f4f..a3fa989e49 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out @@ -3,7 +3,6 @@ sidhzrR4IT8, 0, 0 FILE_BOF_BUFFER The Nationa FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE sidhzrR4IT8, 16557, 0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out index b01f1fbf30..297edfc767 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out @@ -3,7 +3,6 @@ kg59rqyYxN, 0, 0 FILE_BOF_BUFFER {^J "origin FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE kg59rqyYxN, 197, 0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out index 2d2abf89c6..801fd2bd6c 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out @@ -3,7 +3,6 @@ Cx92a0ym5R8, 0, 0 FILE_BOF_BUFFER ^J0.26 | 201 FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE Cx92a0ym5R8, 4705, 0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out index 1d8f8ddbce..61c164c81b 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out @@ -1,7 +1,6 @@ FILE_NEW 7gZBKVUgy4l, 0, 0 FILE_TYPE -file type is set mime type is set FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out index 84c988158d..4d0c0a77ae 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out @@ -1,7 +1,6 @@ FILE_NEW oDwT1BbzjM1, 0, 0 FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE oDwT1BbzjM1, 1022920, 0 @@ -11,7 +10,6 @@ source: HTTP FILE_NEW oDwT1BbzjM1, 0, 0 FILE_TYPE -file type is set mime type is set FILE_TIMEOUT FILE_STATE_REMOVE diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out index 53f433ba73..a0d1a21327 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out @@ -1,7 +1,6 @@ FILE_NEW uHS14uhRKGe, 0, 0 FILE_TYPE -file type is set mime type is set FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out index 28f3a5de04..ba2c318fa1 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out @@ -3,7 +3,6 @@ aFQKI8SPOL2, 0, 0 FILE_BOF_BUFFER /*^J******** FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE aFQKI8SPOL2, 2675, 0 @@ -17,7 +16,6 @@ CCU3vUEr06l, 0, 0 FILE_BOF_BUFFER //-- Google FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE CCU3vUEr06l, 21421, 0 @@ -31,7 +29,6 @@ HCzA0dVwDPj, 0, 0 FILE_BOF_BUFFER GIF89a^D\0^D\0\xb3 FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE HCzA0dVwDPj, 94, 0 @@ -46,7 +43,6 @@ a1Zu1fteVEf, 0, 0 FILE_BOF_BUFFER \x89PNG^M^J^Z^J\0\0\0 FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE a1Zu1fteVEf, 2349, 0 @@ -61,7 +57,6 @@ xXlF7wFdsR, 0, 0 FILE_BOF_BUFFER \x89PNG^M^J^Z^J\0\0\0 FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE xXlF7wFdsR, 27579, 0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out index ac249fd253..3f0146eea7 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out @@ -3,7 +3,6 @@ v5HLI7MxPQh, 0, 0 FILE_BOF_BUFFER hello world FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE v5HLI7MxPQh, 11, 0 @@ -18,7 +17,6 @@ PZS1XGHkIf1, 0, 0 FILE_BOF_BUFFER {^J "origin FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE PZS1XGHkIf1, 366, 0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.input.basic/bro..stdout b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.input.basic/bro..stdout index 2cae5a3f22..d3845e39db 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.input.basic/bro..stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.input.basic/bro..stdout @@ -3,7 +3,6 @@ nYgPNGLrZf9, 0, 0 FILE_BOF_BUFFER #separator FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE nYgPNGLrZf9, 311, 0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out index aa6384f82a..ce5fd67778 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out @@ -3,7 +3,6 @@ wqKMAamJVSb, 0, 0 FILE_BOF_BUFFER PK^C^D^T\0\0\0^H\0\xae FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE wqKMAamJVSb, 42208, 0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log index cf0c223a5b..8e04fefa81 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log @@ -3,8 +3,8 @@ #empty_field (empty) #unset_field - #path file_analysis -#open 2013-04-04-21-22-26 -#fields id parent_id source last_active seen_bytes total_bytes missing_bytes overflow_bytes timeout_interval bof_buffer_size file_type mime_type timedout conn_uids actions_taken extracted_files md5 sha1 sha256 -#types string string string time count count count count interval count string string bool table[string] table[enum] table[string] string string string -Cx92a0ym5R8 - HTTP 1362692527.009775 4705 4705 0 0 120.000000 1024 set set F UWkUyAuUGXf FileAnalysis::ACTION_SHA1,FileAnalysis::ACTION_EXTRACT,FileAnalysis::ACTION_DATA_EVENT,FileAnalysis::ACTION_MD5,FileAnalysis::ACTION_SHA256 Cx92a0ym5R8-file 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18 -#close 2013-04-04-21-22-26 +#open 2013-04-11-17-29-51 +#fields id parent_id source last_active seen_bytes total_bytes missing_bytes overflow_bytes timeout_interval bof_buffer_size mime_type timedout conn_uids actions_taken extracted_files md5 sha1 sha256 +#types string string string time count count count count interval count string bool table[string] table[enum] table[string] string string string +Cx92a0ym5R8 - HTTP 1362692527.009775 4705 4705 0 0 120.000000 1024 set F UWkUyAuUGXf FileAnalysis::ACTION_SHA1,FileAnalysis::ACTION_EXTRACT,FileAnalysis::ACTION_DATA_EVENT,FileAnalysis::ACTION_MD5,FileAnalysis::ACTION_SHA256 Cx92a0ym5R8-file 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18 +#close 2013-04-11-17-29-51 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out index 27e9c42c5b..188b010b35 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out @@ -3,7 +3,6 @@ cwR7l6Zctxb, 0, 0 FILE_BOF_BUFFER Hello^M^J^M^J ^M FILE_TYPE -file type is set mime type is set FILE_STATE_REMOVE cwR7l6Zctxb, 79, 0 @@ -17,7 +16,6 @@ ZAOEQmRyxv1, 0, 0 FILE_BOF_BUFFER Date: Thu, 11 Apr 2013 14:53:54 -0500 Subject: [PATCH 04/10] FileAnalysis: change terminology s/action/analyzer --- .../base/frameworks/file-analysis/main.bro | 49 +++-- scripts/base/init-bare.bro | 3 +- scripts/base/protocols/ftp/file-extract.bro | 8 +- scripts/base/protocols/http/file-extract.bro | 8 +- scripts/base/protocols/http/file-hash.bro | 4 +- scripts/base/protocols/irc/dcc-send.bro | 8 +- scripts/base/protocols/smtp/entities.bro | 15 +- .../frameworks/intel/smtp-url-extraction.bro | 4 +- src/CMakeLists.txt | 4 +- src/file_analysis.bif | 20 +- src/file_analysis/Action.h | 101 ---------- src/file_analysis/ActionSet.cc | 185 ----------------- src/file_analysis/ActionSet.h | 109 ---------- src/file_analysis/Analyzer.h | 103 ++++++++++ src/file_analysis/AnalyzerSet.cc | 188 ++++++++++++++++++ src/file_analysis/AnalyzerSet.h | 109 ++++++++++ src/file_analysis/DataEvent.cc | 10 +- src/file_analysis/DataEvent.h | 8 +- src/file_analysis/Extract.cc | 8 +- src/file_analysis/Extract.h | 8 +- src/file_analysis/File.cc | 77 +++---- src/file_analysis/File.h | 26 +-- src/file_analysis/Hash.cc | 2 +- src/file_analysis/Hash.h | 12 +- src/file_analysis/Manager.cc | 10 +- src/file_analysis/Manager.h | 16 +- src/types.bif | 14 +- .../file_analysis.log | 8 +- .../file-analysis/bifs/remove_action.bro | 6 +- testing/scripts/file-analysis-test.bro | 22 +- 30 files changed, 575 insertions(+), 570 deletions(-) delete mode 100644 src/file_analysis/Action.h delete mode 100644 src/file_analysis/ActionSet.cc delete mode 100644 src/file_analysis/ActionSet.h create mode 100644 src/file_analysis/Analyzer.h create mode 100644 src/file_analysis/AnalyzerSet.cc create mode 100644 src/file_analysis/AnalyzerSet.h diff --git a/scripts/base/frameworks/file-analysis/main.bro b/scripts/base/frameworks/file-analysis/main.bro index e148248727..649ab5d43c 100644 --- a/scripts/base/frameworks/file-analysis/main.bro +++ b/scripts/base/frameworks/file-analysis/main.bro @@ -12,13 +12,13 @@ export { LOG }; - ## A structure which represents a desired file analysis action to take. - type ActionArgs: record { - ## The type of action. - act: Action; + ## A structure which represents a desired type of file analysis. + type AnalyzerArgs: record { + ## The type of analysis. + tag: Analyzer; ## The local filename to which to write an extracted file. Must be - ## set when *act* is :bro:see:`FileAnalysis::ACTION_EXTRACT`. + ## set when *tag* is :bro:see:`FileAnalysis::ANALYZER_EXTRACT`. extract_filename: string &optional; ## An event which will be generated for all new file contents, @@ -60,8 +60,7 @@ export { missing_bytes: count &log &default=0; ## The number of not all-in-sequence bytes in the file stream that - ## were delivered to file actions/analyzers due to reassembly buffer - ## overflow. + ## were delivered to file analyzers due to reassembly buffer overflow. overflow_bytes: count &log &default=0; ## The amount of time between receiving new data for this file that @@ -83,10 +82,10 @@ export { ## Connection UIDS over which the file was transferred. conn_uids: set[string] &log; - ## A set of action types taken during the file analysis. - actions_taken: set[Action] &log; + ## A set of analysis types done during the file analysis. + analyzers: set[Analyzer] &log; - ## Local filenames of file extraction actions. + ## Local filenames of extracted files. extracted_files: set[string] &log; ## An MD5 digest of the file contents. @@ -139,26 +138,26 @@ export { ## for the *id* isn't currently active. global postpone_timeout: function(f: fa_file): bool; - ## Adds an action to the analysis of a given file. + ## Adds an analyzer to the analysis of a given file. ## ## f: the file. ## - ## args: the action type to add along with any arguments it takes. + ## args: the analyzer type to add along with any arguments it takes. ## - ## Returns: true if the action will be added, or false if analysis + ## Returns: true if the analyzer will be added, or false if analysis ## for the *id* isn't currently active or the *args* - ## were invalid for the action type. - global add_action: function(f: fa_file, args: ActionArgs): bool; + ## were invalid for the analyzer type. + global add_analyzer: function(f: fa_file, args: AnalyzerArgs): bool; - ## Removes an action from the analysis of a given file. + ## Removes an analyzer from the analysis of a given file. ## ## f: the file. ## - ## args: the action (type and args) to remove. + ## args: the analyzer (type and args) to remove. ## - ## Returns: true if the action will be removed, or false if analysis + ## Returns: true if the analyzer will be removed, or false if analysis ## for the *id* isn't currently active. - global remove_action: function(f: fa_file, args: ActionArgs): bool; + global remove_analyzer: function(f: fa_file, args: AnalyzerArgs): bool; ## Stops/ignores any further analysis of a given file. ## @@ -260,22 +259,22 @@ function postpone_timeout(f: fa_file): bool return __postpone_timeout(f$id); } -function add_action(f: fa_file, args: ActionArgs): bool +function add_analyzer(f: fa_file, args: AnalyzerArgs): bool { - if ( ! __add_action(f$id, args) ) return F; + if ( ! __add_analyzer(f$id, args) ) return F; set_info(f); - add f$info$actions_taken[args$act]; + add f$info$analyzers[args$tag]; - if ( args$act == FileAnalysis::ACTION_EXTRACT ) + if ( args$tag == FileAnalysis::ANALYZER_EXTRACT ) add f$info$extracted_files[args$extract_filename]; return T; } -function remove_action(f: fa_file, args: ActionArgs): bool +function remove_analyzer(f: fa_file, args: AnalyzerArgs): bool { - return __remove_action(f$id, args); + return __remove_analyzer(f$id, args); } function stop(f: fa_file): bool diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro index e59fa6fd72..8aeeac478c 100644 --- a/scripts/base/init-bare.bro +++ b/scripts/base/init-bare.bro @@ -358,8 +358,7 @@ type fa_file: record { missing_bytes: count &default=0; ## The number of not all-in-sequence bytes in the file stream that - ## were delivered to file actions/analyzers due to reassembly buffer - ## overflow. + ## were delivered to file analyzers due to reassembly buffer overflow. overflow_bytes: count &default=0; ## The amount of time between receiving new data for this file that diff --git a/scripts/base/protocols/ftp/file-extract.bro b/scripts/base/protocols/ftp/file-extract.bro index 0f668bf4d0..f14839b616 100644 --- a/scripts/base/protocols/ftp/file-extract.bro +++ b/scripts/base/protocols/ftp/file-extract.bro @@ -38,8 +38,8 @@ event file_new(f: fa_file) &priority=5 if ( f?$mime_type && extract_file_types in f$mime_type ) { - FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT, - $extract_filename=get_extraction_name(f)]); + FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, + $extract_filename=get_extraction_name(f)]); return; } @@ -55,8 +55,8 @@ event file_new(f: fa_file) &priority=5 if ( ! s$extract_file ) next; - FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT, - $extract_filename=get_extraction_name(f)]); + FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, + $extract_filename=get_extraction_name(f)]); return; } } diff --git a/scripts/base/protocols/http/file-extract.bro b/scripts/base/protocols/http/file-extract.bro index 6e56915051..9c0899b2b6 100644 --- a/scripts/base/protocols/http/file-extract.bro +++ b/scripts/base/protocols/http/file-extract.bro @@ -44,8 +44,8 @@ event file_new(f: fa_file) &priority=5 if ( f?$mime_type && extract_file_types in f$mime_type ) { fname = get_extraction_name(f); - FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT, - $extract_filename=fname]); + FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, + $extract_filename=fname]); for ( cid in f$conns ) { @@ -68,8 +68,8 @@ event file_new(f: fa_file) &priority=5 if ( ! c$http$extract_file ) next; fname = get_extraction_name(f); - FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT, - $extract_filename=fname]); + FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, + $extract_filename=fname]); extracting = T; break; } diff --git a/scripts/base/protocols/http/file-hash.bro b/scripts/base/protocols/http/file-hash.bro index 2b78233e2d..34d91e45bb 100644 --- a/scripts/base/protocols/http/file-hash.bro +++ b/scripts/base/protocols/http/file-hash.bro @@ -30,7 +30,7 @@ event file_new(f: fa_file) &priority=5 if ( f?$mime_type && generate_md5 in f$mime_type ) { - FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_MD5]); + FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]); return; } @@ -44,7 +44,7 @@ event file_new(f: fa_file) &priority=5 if ( ! c$http$calc_md5 ) next; - FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_MD5]); + FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]); return; } } diff --git a/scripts/base/protocols/irc/dcc-send.bro b/scripts/base/protocols/irc/dcc-send.bro index 69219349ea..8f3de2ac09 100644 --- a/scripts/base/protocols/irc/dcc-send.bro +++ b/scripts/base/protocols/irc/dcc-send.bro @@ -101,8 +101,8 @@ event file_new(f: fa_file) &priority=5 if ( f?$mime_type && extract_file_types in f$mime_type ) { fname = get_extraction_name(f); - FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT, - $extract_filename=fname]); + FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, + $extract_filename=fname]); set_dcc_extraction_file(f, fname); return; } @@ -120,8 +120,8 @@ event file_new(f: fa_file) &priority=5 if ( ! s$extract_file ) next; fname = get_extraction_name(f); - FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT, - $extract_filename=fname]); + FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, + $extract_filename=fname]); s$extraction_file = fname; return; } diff --git a/scripts/base/protocols/smtp/entities.bro b/scripts/base/protocols/smtp/entities.bro index 9747a56522..19cca30db1 100644 --- a/scripts/base/protocols/smtp/entities.bro +++ b/scripts/base/protocols/smtp/entities.bro @@ -123,8 +123,9 @@ event file_new(f: fa_file) &priority=5 if ( ! extracting ) { fname = get_extraction_name(f); - FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT, - $extract_filename=fname]); + FileAnalysis::add_analyzer(f, + [$tag=FileAnalysis::ANALYZER_EXTRACT, + $extract_filename=fname]); extracting = T; ++extract_count; } @@ -133,7 +134,7 @@ event file_new(f: fa_file) &priority=5 } if ( c$smtp$current_entity$calc_md5 ) - FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_MD5]); + FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]); } } @@ -141,12 +142,12 @@ function check_extract_by_type(f: fa_file) { if ( extract_file_types !in f$mime_type ) return; - if ( f?$info && FileAnalysis::ACTION_EXTRACT in f$info$actions_taken ) + if ( f?$info && FileAnalysis::ANALYZER_EXTRACT in f$info$analyzers ) return; local fname: string = get_extraction_name(f); - FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT, - $extract_filename=fname]); + FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, + $extract_filename=fname]); if ( ! f?$conns ) return; @@ -163,7 +164,7 @@ function check_md5_by_type(f: fa_file) if ( never_calc_md5 ) return; if ( generate_md5 !in f$mime_type ) return; - FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_MD5]); + FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_MD5]); } event file_new(f: fa_file) &priority=5 diff --git a/scripts/policy/frameworks/intel/smtp-url-extraction.bro b/scripts/policy/frameworks/intel/smtp-url-extraction.bro index 12f40f8d53..2b87f809a6 100644 --- a/scripts/policy/frameworks/intel/smtp-url-extraction.bro +++ b/scripts/policy/frameworks/intel/smtp-url-extraction.bro @@ -26,6 +26,6 @@ event file_new(f: fa_file) &priority=5 if ( ! f?$source ) return; if ( f$source != "SMTP" ) return; - FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_DATA_EVENT, - $stream_event=intel_mime_data]); + FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_DATA_EVENT, + $stream_event=intel_mime_data]); } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 28fe7e6bff..fdd5c562e7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -453,8 +453,8 @@ set(bro_SRCS file_analysis/File.cc file_analysis/FileTimer.cc file_analysis/FileID.h - file_analysis/Action.h - file_analysis/ActionSet.cc + file_analysis/Analyzer.h + file_analysis/AnalyzerSet.cc file_analysis/Extract.cc file_analysis/Hash.cc file_analysis/DataEvent.cc diff --git a/src/file_analysis.bif b/src/file_analysis.bif index 12b176808a..67e692aacf 100644 --- a/src/file_analysis.bif +++ b/src/file_analysis.bif @@ -23,24 +23,24 @@ function FileAnalysis::__set_timeout_interval%(file_id: string, t: interval%): b return new Val(result, TYPE_BOOL); %} -## :bro:see:`FileAnalysis::add_action`. -function FileAnalysis::__add_action%(file_id: string, args: any%): bool +## :bro:see:`FileAnalysis::add_analyzer`. +function FileAnalysis::__add_analyzer%(file_id: string, args: any%): bool %{ using file_analysis::FileID; - using BifType::Record::FileAnalysis::ActionArgs; - RecordVal* rv = args->AsRecordVal()->CoerceTo(ActionArgs); - bool result = file_mgr->AddAction(FileID(file_id->CheckString()), rv); + using BifType::Record::FileAnalysis::AnalyzerArgs; + RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs); + bool result = file_mgr->AddAnalyzer(FileID(file_id->CheckString()), rv); Unref(rv); return new Val(result, TYPE_BOOL); %} -## :bro:see:`FileAnalysis::remove_action`. -function FileAnalysis::__remove_action%(file_id: string, args: any%): bool +## :bro:see:`FileAnalysis::remove_analyzer`. +function FileAnalysis::__remove_analyzer%(file_id: string, args: any%): bool %{ using file_analysis::FileID; - using BifType::Record::FileAnalysis::ActionArgs; - RecordVal* rv = args->AsRecordVal()->CoerceTo(ActionArgs); - bool result = file_mgr->RemoveAction(FileID(file_id->CheckString()), rv); + using BifType::Record::FileAnalysis::AnalyzerArgs; + RecordVal* rv = args->AsRecordVal()->CoerceTo(AnalyzerArgs); + bool result = file_mgr->RemoveAnalyzer(FileID(file_id->CheckString()), rv); Unref(rv); return new Val(result, TYPE_BOOL); %} diff --git a/src/file_analysis/Action.h b/src/file_analysis/Action.h deleted file mode 100644 index e8fd30f360..0000000000 --- a/src/file_analysis/Action.h +++ /dev/null @@ -1,101 +0,0 @@ -#ifndef FILE_ANALYSIS_ACTION_H -#define FILE_ANALYSIS_ACTION_H - -#include "Val.h" -#include "NetVar.h" - -namespace file_analysis { - -typedef BifEnum::FileAnalysis::Action ActionTag; - -class File; - -/** - * Base class for actions that can be attached to a file_analysis::File object. - */ -class Action { -public: - - virtual ~Action() - { - DBG_LOG(DBG_FILE_ANALYSIS, "Destroy action %d", tag); - Unref(args); - } - - /** - * Subclasses may override this to receive file data non-sequentially. - * @return true if the action is still in a valid state to continue - * receiving data/events or false if it's essentially "done". - */ - virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset) - { return true; } - - /** - * Subclasses may override this to receive file sequentially. - * @return true if the action is still in a valid state to continue - * receiving data/events or false if it's essentially "done". - */ - virtual bool DeliverStream(const u_char* data, uint64 len) - { return true; } - - /** - * Subclasses may override this to specifically handle an EOF signal, - * which means no more data is going to be incoming and the action/analyzer - * may be deleted/cleaned up soon. - * @return true if the action is still in a valid state to continue - * receiving data/events or false if it's essentially "done". - */ - virtual bool EndOfFile() - { return true; } - - /** - * Subclasses may override this to handle missing data in a file stream. - * @return true if the action is still in a valid state to continue - * receiving data/events or false if it's essentially "done". - */ - virtual bool Undelivered(uint64 offset, uint64 len) - { return true; } - - /** - * @return the action type enum value. - */ - ActionTag Tag() const { return tag; } - - /** - * @return the ActionArgs associated with the aciton. - */ - RecordVal* Args() const { return args; } - - /** - * @return the file_analysis::File object to which the action is attached. - */ - File* GetFile() const { return file; } - - /** - * @return the action tag equivalent of the 'act' field from the ActionArgs - * value \a args. - */ - static ActionTag ArgsTag(const RecordVal* args) - { - using BifType::Record::FileAnalysis::ActionArgs; - return static_cast( - args->Lookup(ActionArgs->FieldOffset("act"))->AsEnum()); - } - -protected: - - Action(RecordVal* arg_args, File* arg_file) - : tag(Action::ArgsTag(arg_args)), args(arg_args->Ref()->AsRecordVal()), - file(arg_file) - {} - - ActionTag tag; - RecordVal* args; - File* file; -}; - -typedef Action* (*ActionInstantiator)(RecordVal* args, File* file); - -} // namespace file_analysis - -#endif diff --git a/src/file_analysis/ActionSet.cc b/src/file_analysis/ActionSet.cc deleted file mode 100644 index 638519b001..0000000000 --- a/src/file_analysis/ActionSet.cc +++ /dev/null @@ -1,185 +0,0 @@ -#include "ActionSet.h" -#include "File.h" -#include "Action.h" -#include "Extract.h" -#include "DataEvent.h" -#include "Hash.h" - -using namespace file_analysis; - -// keep in order w/ declared enum values in file_analysis.bif -static ActionInstantiator action_factory[] = { - file_analysis::Extract::Instantiate, - file_analysis::MD5::Instantiate, - file_analysis::SHA1::Instantiate, - file_analysis::SHA256::Instantiate, - file_analysis::DataEvent::Instantiate, -}; - -static void action_del_func(void* v) - { - delete (Action*) v; - } - -ActionSet::ActionSet(File* arg_file) : file(arg_file) - { - TypeList* t = new TypeList(); - t->Append(BifType::Record::FileAnalysis::ActionArgs->Ref()); - action_hash = new CompositeHash(t); - Unref(t); - action_map.SetDeleteFunc(action_del_func); - } - -ActionSet::~ActionSet() - { - while ( ! mod_queue.empty() ) - { - Modification* mod = mod_queue.front(); - mod->Abort(); - delete mod; - mod_queue.pop(); - } - delete action_hash; - } - -bool ActionSet::AddAction(RecordVal* args) - { - HashKey* key = GetKey(args); - - if ( action_map.Lookup(key) ) - { - DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate action %d skipped for file id" - " %s: already exists", Action::ArgsTag(args), - file->GetID().c_str()); - delete key; - return true; - } - - Action* act = InstantiateAction(args); - - if ( ! act ) - { - delete key; - return false; - } - - InsertAction(act, key); - - return true; - } - -bool ActionSet::QueueAddAction(RecordVal* args) - { - HashKey* key = GetKey(args); - Action* act = InstantiateAction(args); - - if ( ! act ) - { - delete key; - return false; - } - - mod_queue.push(new Add(act, key)); - - return true; - } - -bool ActionSet::Add::Perform(ActionSet* set) - { - if ( set->action_map.Lookup(key) ) - { - DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d skipped for file id" - " %s: already exists", act->Tag(), - act->GetFile()->GetID().c_str()); - Abort(); - return true; - } - - set->InsertAction(act, key); - return true; - } - -bool ActionSet::RemoveAction(const RecordVal* args) - { - return RemoveAction(Action::ArgsTag(args), GetKey(args)); - } - -bool ActionSet::RemoveAction(ActionTag tag, HashKey* key) - { - Action* act = (Action*) action_map.Remove(key); - delete key; - - if ( ! act ) - { - DBG_LOG(DBG_FILE_ANALYSIS, "Skip remove action %d for file id %s", - tag, file->GetID().c_str()); - return false; - } - - DBG_LOG(DBG_FILE_ANALYSIS, "Remove action %d for file id %s", act->Tag(), - file->GetID().c_str()); - delete act; - return true; - } - -bool ActionSet::QueueRemoveAction(const RecordVal* args) - { - HashKey* key = GetKey(args); - ActionTag tag = Action::ArgsTag(args); - - mod_queue.push(new Remove(tag, key)); - - return action_map.Lookup(key); - } - -bool ActionSet::Remove::Perform(ActionSet* set) - { - return set->RemoveAction(tag, key); - } - -HashKey* ActionSet::GetKey(const RecordVal* args) const - { - HashKey* key = action_hash->ComputeHash(args, 1); - if ( ! key ) - reporter->InternalError("ActionArgs type mismatch"); - return key; - } - -Action* ActionSet::InstantiateAction(RecordVal* args) const - { - Action* act = action_factory[Action::ArgsTag(args)](args, file); - - if ( ! act ) - { - DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate action %d failed for file id", - " %s", Action::ArgsTag(args), file->GetID().c_str()); - return 0; - } - - return act; - } - -void ActionSet::InsertAction(Action* act, HashKey* key) - { - DBG_LOG(DBG_FILE_ANALYSIS, "Add action %d for file id %s", act->Tag(), - file->GetID().c_str()); - action_map.Insert(key, act); - delete key; - } - -void ActionSet::DrainModifications() - { - if ( mod_queue.empty() ) return; - - DBG_LOG(DBG_FILE_ANALYSIS, "Start flushing action mod queue of file id %s", - file->GetID().c_str()); - do - { - Modification* mod = mod_queue.front(); - mod->Perform(this); - delete mod; - mod_queue.pop(); - } while ( ! mod_queue.empty() ); - DBG_LOG(DBG_FILE_ANALYSIS, "End flushing action mod queue of file id %s", - file->GetID().c_str()); - } diff --git a/src/file_analysis/ActionSet.h b/src/file_analysis/ActionSet.h deleted file mode 100644 index b65477dbf0..0000000000 --- a/src/file_analysis/ActionSet.h +++ /dev/null @@ -1,109 +0,0 @@ -#ifndef FILE_ANALYSIS_ACTIONSET_H -#define FILE_ANALYSIS_ACTIONSET_H - -#include - -#include "Action.h" -#include "Dict.h" -#include "CompHash.h" -#include "Val.h" - -namespace file_analysis { - -class File; -declare(PDict,Action); - -/** - * A set of file analysis actions indexed by ActionArgs. Allows queueing - * of addition/removals so that those modifications can happen at well-defined - * times (e.g. to make sure a loop iterator isn't invalidated). - */ -class ActionSet { -public: - - ActionSet(File* arg_file); - - ~ActionSet(); - - /** - * @return true if action was instantiated/attached, else false. - */ - bool AddAction(RecordVal* args); - - /** - * @return true if action was able to be instantiated, else false. - */ - bool QueueAddAction(RecordVal* args); - - /** - * @return false if action didn't exist and so wasn't removed, else true. - */ - bool RemoveAction(const RecordVal* args); - - /** - * @return true if action exists at time of call, else false; - */ - bool QueueRemoveAction(const RecordVal* args); - - /** - * Perform all queued modifications to the currently active actions. - */ - void DrainModifications(); - - IterCookie* InitForIteration() const - { return action_map.InitForIteration(); } - - Action* NextEntry(IterCookie* c) - { return action_map.NextEntry(c); } - -protected: - - HashKey* GetKey(const RecordVal* args) const; - Action* InstantiateAction(RecordVal* args) const; - void InsertAction(Action* act, HashKey* key); - bool RemoveAction(ActionTag tag, HashKey* key); - - File* file; - CompositeHash* action_hash; /**< ActionArgs hashes Action map lookup. */ - PDict(Action) action_map; /**< Actions indexed by ActionArgs. */ - - class Modification { - public: - virtual ~Modification() {} - virtual bool Perform(ActionSet* set) = 0; - virtual void Abort() = 0; - }; - - class Add : public Modification { - public: - Add(Action* arg_act, HashKey* arg_key) - : Modification(), act(arg_act), key(arg_key) {} - virtual ~Add() {} - virtual bool Perform(ActionSet* set); - virtual void Abort() { delete act; delete key; } - - protected: - Action* act; - HashKey* key; - }; - - class Remove : public Modification { - public: - Remove(ActionTag arg_tag, HashKey* arg_key) - : Modification(), tag(arg_tag), key(arg_key) {} - virtual ~Remove() {} - virtual bool Perform(ActionSet* set); - virtual void Abort() { delete key; } - - protected: - ActionTag tag; - HashKey* key; - }; - - typedef queue ModQueue; - ModQueue mod_queue; -}; - -} // namespace file_analysiss - -#endif diff --git a/src/file_analysis/Analyzer.h b/src/file_analysis/Analyzer.h new file mode 100644 index 0000000000..77139f5547 --- /dev/null +++ b/src/file_analysis/Analyzer.h @@ -0,0 +1,103 @@ +#ifndef FILE_ANALYSIS_ANALYZER_H +#define FILE_ANALYSIS_ANALYZER_H + +#include "Val.h" +#include "NetVar.h" + +namespace file_analysis { + +typedef BifEnum::FileAnalysis::Analyzer FA_Tag; + +class File; + +/** + * Base class for analyzers that can be attached to file_analysis::File objects. + */ +class Analyzer { +public: + + virtual ~Analyzer() + { + DBG_LOG(DBG_FILE_ANALYSIS, "Destroy file analyzer %d", tag); + Unref(args); + } + + /** + * Subclasses may override this to receive file data non-sequentially. + * @return true if the analyzer is still in a valid state to continue + * receiving data/events or false if it's essentially "done". + */ + virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset) + { return true; } + + /** + * Subclasses may override this to receive file sequentially. + * @return true if the analyzer is still in a valid state to continue + * receiving data/events or false if it's essentially "done". + */ + virtual bool DeliverStream(const u_char* data, uint64 len) + { return true; } + + /** + * Subclasses may override this to specifically handle an EOF signal, + * which means no more data is going to be incoming and the analyzer + * may be deleted/cleaned up soon. + * @return true if the analyzer is still in a valid state to continue + * receiving data/events or false if it's essentially "done". + */ + virtual bool EndOfFile() + { return true; } + + /** + * Subclasses may override this to handle missing data in a file stream. + * @return true if the analyzer is still in a valid state to continue + * receiving data/events or false if it's essentially "done". + */ + virtual bool Undelivered(uint64 offset, uint64 len) + { return true; } + + /** + * @return the analyzer type enum value. + */ + FA_Tag Tag() const { return tag; } + + /** + * @return the AnalyzerArgs associated with the analyzer. + */ + RecordVal* Args() const { return args; } + + /** + * @return the file_analysis::File object to which the analyzer is attached. + */ + File* GetFile() const { return file; } + + /** + * @return the analyzer tag equivalent of the 'tag' field from the + * AnalyzerArgs value \a args. + */ + static FA_Tag ArgsTag(const RecordVal* args) + { + using BifType::Record::FileAnalysis::AnalyzerArgs; + return static_cast( + args->Lookup(AnalyzerArgs->FieldOffset("tag"))->AsEnum()); + } + +protected: + + Analyzer(RecordVal* arg_args, File* arg_file) + : tag(file_analysis::Analyzer::ArgsTag(arg_args)), + args(arg_args->Ref()->AsRecordVal()), + file(arg_file) + {} + + FA_Tag tag; + RecordVal* args; + File* file; +}; + +typedef file_analysis::Analyzer* (*AnalyzerInstantiator)(RecordVal* args, + File* file); + +} // namespace file_analysis + +#endif diff --git a/src/file_analysis/AnalyzerSet.cc b/src/file_analysis/AnalyzerSet.cc new file mode 100644 index 0000000000..bdf23c2446 --- /dev/null +++ b/src/file_analysis/AnalyzerSet.cc @@ -0,0 +1,188 @@ +#include "AnalyzerSet.h" +#include "File.h" +#include "Analyzer.h" +#include "Extract.h" +#include "DataEvent.h" +#include "Hash.h" + +using namespace file_analysis; + +// keep in order w/ declared enum values in file_analysis.bif +static AnalyzerInstantiator analyzer_factory[] = { + file_analysis::Extract::Instantiate, + file_analysis::MD5::Instantiate, + file_analysis::SHA1::Instantiate, + file_analysis::SHA256::Instantiate, + file_analysis::DataEvent::Instantiate, +}; + +static void analyzer_del_func(void* v) + { + delete (file_analysis::Analyzer*) v; + } + +AnalyzerSet::AnalyzerSet(File* arg_file) : file(arg_file) + { + TypeList* t = new TypeList(); + t->Append(BifType::Record::FileAnalysis::AnalyzerArgs->Ref()); + analyzer_hash = new CompositeHash(t); + Unref(t); + analyzer_map.SetDeleteFunc(analyzer_del_func); + } + +AnalyzerSet::~AnalyzerSet() + { + while ( ! mod_queue.empty() ) + { + Modification* mod = mod_queue.front(); + mod->Abort(); + delete mod; + mod_queue.pop(); + } + delete analyzer_hash; + } + +bool AnalyzerSet::Add(RecordVal* args) + { + HashKey* key = GetKey(args); + + if ( analyzer_map.Lookup(key) ) + { + DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %d skipped for file id" + " %s: already exists", file_analysis::Analyzer::ArgsTag(args), + file->GetID().c_str()); + delete key; + return true; + } + + file_analysis::Analyzer* a = InstantiateAnalyzer(args); + + if ( ! a ) + { + delete key; + return false; + } + + Insert(a, key); + + return true; + } + +bool AnalyzerSet::QueueAdd(RecordVal* args) + { + HashKey* key = GetKey(args); + file_analysis::Analyzer* a = InstantiateAnalyzer(args); + + if ( ! a ) + { + delete key; + return false; + } + + mod_queue.push(new AddMod(a, key)); + + return true; + } + +bool AnalyzerSet::AddMod::Perform(AnalyzerSet* set) + { + if ( set->analyzer_map.Lookup(key) ) + { + DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %d skipped for file id" + " %s: already exists", a->Tag(), a->GetFile()->GetID().c_str()); + + Abort(); + return true; + } + + set->Insert(a, key); + return true; + } + +bool AnalyzerSet::Remove(const RecordVal* args) + { + return Remove(file_analysis::Analyzer::ArgsTag(args), GetKey(args)); + } + +bool AnalyzerSet::Remove(FA_Tag tag, HashKey* key) + { + file_analysis::Analyzer* a = + (file_analysis::Analyzer*) analyzer_map.Remove(key); + delete key; + + if ( ! a ) + { + DBG_LOG(DBG_FILE_ANALYSIS, "Skip remove analyzer %d for file id %s", + tag, file->GetID().c_str()); + return false; + } + + DBG_LOG(DBG_FILE_ANALYSIS, "Remove analyzer %d for file id %s", a->Tag(), + file->GetID().c_str()); + delete a; + return true; + } + +bool AnalyzerSet::QueueRemove(const RecordVal* args) + { + HashKey* key = GetKey(args); + FA_Tag tag = file_analysis::Analyzer::ArgsTag(args); + + mod_queue.push(new RemoveMod(tag, key)); + + return analyzer_map.Lookup(key); + } + +bool AnalyzerSet::RemoveMod::Perform(AnalyzerSet* set) + { + return set->Remove(tag, key); + } + +HashKey* AnalyzerSet::GetKey(const RecordVal* args) const + { + HashKey* key = analyzer_hash->ComputeHash(args, 1); + if ( ! key ) + reporter->InternalError("AnalyzerArgs type mismatch"); + return key; + } + +file_analysis::Analyzer* AnalyzerSet::InstantiateAnalyzer(RecordVal* args) const + { + file_analysis::Analyzer* a = + analyzer_factory[file_analysis::Analyzer::ArgsTag(args)](args, file); + + if ( ! a ) + { + DBG_LOG(DBG_FILE_ANALYSIS, "Instantiate analyzer %d failed for file id", + " %s", file_analysis::Analyzer::ArgsTag(args), + file->GetID().c_str()); + return 0; + } + + return a; + } + +void AnalyzerSet::Insert(file_analysis::Analyzer* a, HashKey* key) + { + DBG_LOG(DBG_FILE_ANALYSIS, "Add analyzer %d for file id %s", a->Tag(), + file->GetID().c_str()); + analyzer_map.Insert(key, a); + delete key; + } + +void AnalyzerSet::DrainModifications() + { + if ( mod_queue.empty() ) return; + + DBG_LOG(DBG_FILE_ANALYSIS, "Start analyzer mod queue flush of file id %s", + file->GetID().c_str()); + do + { + Modification* mod = mod_queue.front(); + mod->Perform(this); + delete mod; + mod_queue.pop(); + } while ( ! mod_queue.empty() ); + DBG_LOG(DBG_FILE_ANALYSIS, "End flushing analyzer mod queue of file id %s", + file->GetID().c_str()); + } diff --git a/src/file_analysis/AnalyzerSet.h b/src/file_analysis/AnalyzerSet.h new file mode 100644 index 0000000000..357ca8d9de --- /dev/null +++ b/src/file_analysis/AnalyzerSet.h @@ -0,0 +1,109 @@ +#ifndef FILE_ANALYSIS_ANALYZERSET_H +#define FILE_ANALYSIS_ANALYZERSET_H + +#include + +#include "Analyzer.h" +#include "Dict.h" +#include "CompHash.h" +#include "Val.h" + +namespace file_analysis { + +class File; +declare(PDict,Analyzer); + +/** + * A set of file analysis analyzers indexed by AnalyzerArgs. Allows queueing + * of addition/removals so that those modifications can happen at well-defined + * times (e.g. to make sure a loop iterator isn't invalidated). + */ +class AnalyzerSet { +public: + + AnalyzerSet(File* arg_file); + + ~AnalyzerSet(); + + /** + * @return true if analyzer was instantiated/attached, else false. + */ + bool Add(RecordVal* args); + + /** + * @return true if analyzer was able to be instantiated, else false. + */ + bool QueueAdd(RecordVal* args); + + /** + * @return false if analyzer didn't exist and so wasn't removed, else true. + */ + bool Remove(const RecordVal* args); + + /** + * @return true if analyzer exists at time of call, else false; + */ + bool QueueRemove(const RecordVal* args); + + /** + * Perform all queued modifications to the currently active analyzers. + */ + void DrainModifications(); + + IterCookie* InitForIteration() const + { return analyzer_map.InitForIteration(); } + + file_analysis::Analyzer* NextEntry(IterCookie* c) + { return analyzer_map.NextEntry(c); } + +protected: + + HashKey* GetKey(const RecordVal* args) const; + file_analysis::Analyzer* InstantiateAnalyzer(RecordVal* args) const; + void Insert(file_analysis::Analyzer* a, HashKey* key); + bool Remove(FA_Tag tag, HashKey* key); + + File* file; + CompositeHash* analyzer_hash; /**< AnalyzerArgs hashes. */ + PDict(file_analysis::Analyzer) analyzer_map; /**< Indexed by AnalyzerArgs. */ + + class Modification { + public: + virtual ~Modification() {} + virtual bool Perform(AnalyzerSet* set) = 0; + virtual void Abort() = 0; + }; + + class AddMod : public Modification { + public: + AddMod(file_analysis::Analyzer* arg_a, HashKey* arg_key) + : Modification(), a(arg_a), key(arg_key) {} + virtual ~AddMod() {} + virtual bool Perform(AnalyzerSet* set); + virtual void Abort() { delete a; delete key; } + + protected: + file_analysis::Analyzer* a; + HashKey* key; + }; + + class RemoveMod : public Modification { + public: + RemoveMod(FA_Tag arg_tag, HashKey* arg_key) + : Modification(), tag(arg_tag), key(arg_key) {} + virtual ~RemoveMod() {} + virtual bool Perform(AnalyzerSet* set); + virtual void Abort() { delete key; } + + protected: + FA_Tag tag; + HashKey* key; + }; + + typedef queue ModQueue; + ModQueue mod_queue; +}; + +} // namespace file_analysiss + +#endif diff --git a/src/file_analysis/DataEvent.cc b/src/file_analysis/DataEvent.cc index d8d8c3c680..39652c6a53 100644 --- a/src/file_analysis/DataEvent.cc +++ b/src/file_analysis/DataEvent.cc @@ -9,18 +9,18 @@ using namespace file_analysis; DataEvent::DataEvent(RecordVal* args, File* file, EventHandlerPtr ce, EventHandlerPtr se) - : Action(args, file), chunk_event(ce), stream_event(se) + : file_analysis::Analyzer(args, file), chunk_event(ce), stream_event(se) { } -Action* DataEvent::Instantiate(RecordVal* args, File* file) +file_analysis::Analyzer* DataEvent::Instantiate(RecordVal* args, File* file) { - using BifType::Record::FileAnalysis::ActionArgs; + using BifType::Record::FileAnalysis::AnalyzerArgs; const char* chunk_field = "chunk_event"; const char* stream_field = "stream_event"; - int chunk_off = ActionArgs->FieldOffset(chunk_field); - int stream_off = ActionArgs->FieldOffset(stream_field); + int chunk_off = AnalyzerArgs->FieldOffset(chunk_field); + int stream_off = AnalyzerArgs->FieldOffset(stream_field); Val* chunk_val = args->Lookup(chunk_off); Val* stream_val = args->Lookup(stream_off); diff --git a/src/file_analysis/DataEvent.h b/src/file_analysis/DataEvent.h index dea49e1db8..be6f03e178 100644 --- a/src/file_analysis/DataEvent.h +++ b/src/file_analysis/DataEvent.h @@ -5,17 +5,17 @@ #include "Val.h" #include "File.h" -#include "Action.h" +#include "Analyzer.h" namespace file_analysis { /** - * An action to send file data to script-layer events. + * An analyzer to send file data to script-layer events. */ -class DataEvent : public Action { +class DataEvent : public file_analysis::Analyzer { public: - static Action* Instantiate(RecordVal* args, File* file); + static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file); virtual bool DeliverChunk(const u_char* data, uint64 len, uint64 offset); diff --git a/src/file_analysis/Extract.cc b/src/file_analysis/Extract.cc index 7242f54588..860f55bdea 100644 --- a/src/file_analysis/Extract.cc +++ b/src/file_analysis/Extract.cc @@ -6,7 +6,7 @@ using namespace file_analysis; Extract::Extract(RecordVal* args, File* file, const string& arg_filename) - : Action(args, file), filename(arg_filename) + : file_analysis::Analyzer(args, file), filename(arg_filename) { fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666); @@ -25,11 +25,11 @@ Extract::~Extract() safe_close(fd); } -Action* Extract::Instantiate(RecordVal* args, File* file) +file_analysis::Analyzer* Extract::Instantiate(RecordVal* args, File* file) { - using BifType::Record::FileAnalysis::ActionArgs; + using BifType::Record::FileAnalysis::AnalyzerArgs; const char* field = "extract_filename"; - Val* v = args->Lookup(ActionArgs->FieldOffset(field)); + Val* v = args->Lookup(AnalyzerArgs->FieldOffset(field)); if ( ! v ) return 0; diff --git a/src/file_analysis/Extract.h b/src/file_analysis/Extract.h index 0282fac11d..97d2436469 100644 --- a/src/file_analysis/Extract.h +++ b/src/file_analysis/Extract.h @@ -5,17 +5,17 @@ #include "Val.h" #include "File.h" -#include "Action.h" +#include "Analyzer.h" namespace file_analysis { /** - * An action to simply extract files to disk. + * An analyzer to extract files to disk. */ -class Extract : public Action { +class Extract : public file_analysis::Analyzer { public: - static Action* Instantiate(RecordVal* args, File* file); + static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file); virtual ~Extract(); diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc index 3e7e1d7b64..ffd281119b 100644 --- a/src/file_analysis/File.cc +++ b/src/file_analysis/File.cc @@ -4,11 +4,12 @@ #include "File.h" #include "FileTimer.h" #include "FileID.h" +#include "Analyzer.h" #include "Manager.h" #include "Reporter.h" #include "Val.h" #include "Type.h" -#include "Analyzer.h" +#include "../Analyzer.h" #include "Event.h" using namespace file_analysis; @@ -77,7 +78,7 @@ void File::StaticInit() File::File(const string& unique, Connection* conn, AnalyzerTag::Tag tag) : id(""), unique(unique), val(0), postpone_timeout(false), first_chunk(true), missed_bof(false), need_reassembly(false), done(false), - actions(this) + analyzers(this) { StaticInit(); @@ -98,7 +99,7 @@ File::File(const string& unique, Connection* conn, AnalyzerTag::Tag tag) if ( conn ) { // add source and connection fields - val->Assign(source_idx, new StringVal(Analyzer::GetTagName(tag))); + val->Assign(source_idx, new StringVal(::Analyzer::GetTagName(tag))); UpdateConnectionFields(conn); } else @@ -215,14 +216,14 @@ void File::ScheduleInactivityTimer() const timer_mgr->Add(new FileTimer(network_time, id, GetTimeoutInterval())); } -bool File::AddAction(RecordVal* args) +bool File::AddAnalyzer(RecordVal* args) { - return done ? false : actions.QueueAddAction(args); + return done ? false : analyzers.QueueAdd(args); } -bool File::RemoveAction(const RecordVal* args) +bool File::RemoveAnalyzer(const RecordVal* args) { - return done ? false : actions.QueueRemoveAction(args); + return done ? false : analyzers.QueueRemove(args); } bool File::BufferBOF(const u_char* data, uint64 len) @@ -286,7 +287,7 @@ void File::ReplayBOF() void File::DataIn(const u_char* data, uint64 len, uint64 offset) { - actions.DrainModifications(); + analyzers.DrainModifications(); if ( first_chunk ) { @@ -296,16 +297,16 @@ void File::DataIn(const u_char* data, uint64 len, uint64 offset) first_chunk = false; } - Action* act = 0; - IterCookie* c = actions.InitForIteration(); + file_analysis::Analyzer* a = 0; + IterCookie* c = analyzers.InitForIteration(); - while ( (act = actions.NextEntry(c)) ) + while ( (a = analyzers.NextEntry(c)) ) { - if ( ! act->DeliverChunk(data, len, offset) ) - actions.QueueRemoveAction(act->Args()); + if ( ! a->DeliverChunk(data, len, offset) ) + analyzers.QueueRemove(a->Args()); } - actions.DrainModifications(); + analyzers.DrainModifications(); // TODO: check reassembly requirement based on buffer size in record if ( need_reassembly ) @@ -320,7 +321,7 @@ void File::DataIn(const u_char* data, uint64 len, uint64 offset) void File::DataIn(const u_char* data, uint64 len) { - actions.DrainModifications(); + analyzers.DrainModifications(); if ( BufferBOF(data, len) ) return; @@ -331,25 +332,25 @@ void File::DataIn(const u_char* data, uint64 len) missed_bof = false; } - Action* act = 0; - IterCookie* c = actions.InitForIteration(); + file_analysis::Analyzer* a = 0; + IterCookie* c = analyzers.InitForIteration(); - while ( (act = actions.NextEntry(c)) ) + while ( (a = analyzers.NextEntry(c)) ) { - if ( ! act->DeliverStream(data, len) ) + if ( ! a->DeliverStream(data, len) ) { - actions.QueueRemoveAction(act->Args()); + analyzers.QueueRemove(a->Args()); continue; } uint64 offset = LookupFieldDefaultCount(seen_bytes_idx) + LookupFieldDefaultCount(missing_bytes_idx); - if ( ! act->DeliverChunk(data, len, offset) ) - actions.QueueRemoveAction(act->Args()); + if ( ! a->DeliverChunk(data, len, offset) ) + analyzers.QueueRemove(a->Args()); } - actions.DrainModifications(); + analyzers.DrainModifications(); IncrementByteCount(len, seen_bytes_idx); } @@ -357,42 +358,42 @@ void File::EndOfFile() { if ( done ) return; - actions.DrainModifications(); + analyzers.DrainModifications(); // Send along anything that's been buffered, but never flushed. ReplayBOF(); done = true; - Action* act = 0; - IterCookie* c = actions.InitForIteration(); + file_analysis::Analyzer* a = 0; + IterCookie* c = analyzers.InitForIteration(); - while ( (act = actions.NextEntry(c)) ) + while ( (a = analyzers.NextEntry(c)) ) { - if ( ! act->EndOfFile() ) - actions.QueueRemoveAction(act->Args()); + if ( ! a->EndOfFile() ) + analyzers.QueueRemove(a->Args()); } FileEvent(file_state_remove); - actions.DrainModifications(); + analyzers.DrainModifications(); } void File::Gap(uint64 offset, uint64 len) { - actions.DrainModifications(); + analyzers.DrainModifications(); // If we were buffering the beginning of the file, a gap means we've got // as much contiguous stuff at the beginning as possible, so work with that. ReplayBOF(); - Action* act = 0; - IterCookie* c = actions.InitForIteration(); + file_analysis::Analyzer* a = 0; + IterCookie* c = analyzers.InitForIteration(); - while ( (act = actions.NextEntry(c)) ) + while ( (a = analyzers.NextEntry(c)) ) { - if ( ! act->Undelivered(offset, len) ) - actions.QueueRemoveAction(act->Args()); + if ( ! a->Undelivered(offset, len) ) + analyzers.QueueRemove(a->Args()); } if ( FileEventAvailable(file_gap) ) @@ -404,7 +405,7 @@ void File::Gap(uint64 offset, uint64 len) FileEvent(file_gap, vl); } - actions.DrainModifications(); + analyzers.DrainModifications(); IncrementByteCount(len, missing_bytes_idx); } @@ -430,6 +431,6 @@ void File::FileEvent(EventHandlerPtr h, val_list* vl) { // immediate feedback is required for these events. mgr.Drain(); - actions.DrainModifications(); + analyzers.DrainModifications(); } } diff --git a/src/file_analysis/File.h b/src/file_analysis/File.h index 2406f4a32a..8705bce60b 100644 --- a/src/file_analysis/File.h +++ b/src/file_analysis/File.h @@ -8,7 +8,7 @@ #include "AnalyzerTags.h" #include "Conn.h" #include "Val.h" -#include "ActionSet.h" +#include "AnalyzerSet.h" #include "FileID.h" #include "BroString.h" @@ -79,35 +79,35 @@ public: void ScheduleInactivityTimer() const; /** - * Queues attaching an action. Only one action per type can be attached at - * a time unless the arguments differ. - * @return false if action can't be instantiated, else true. + * Queues attaching an analyzer. Only one analyzer per type can be attached + * at a time unless the arguments differ. + * @return false if analyzer can't be instantiated, else true. */ - bool AddAction(RecordVal* args); + bool AddAnalyzer(RecordVal* args); /** - * Queues removal of an action. - * @return true if action was active at time of call, else false. + * Queues removal of an analyzer. + * @return true if analyzer was active at time of call, else false. */ - bool RemoveAction(const RecordVal* args); + bool RemoveAnalyzer(const RecordVal* args); /** - * Pass in non-sequential data and deliver to attached actions/analyzers. + * Pass in non-sequential data and deliver to attached analyzers. */ void DataIn(const u_char* data, uint64 len, uint64 offset); /** - * Pass in sequential data and deliver to attached actions/analyzers. + * Pass in sequential data and deliver to attached analyzers. */ void DataIn(const u_char* data, uint64 len); /** - * Inform attached actions/analyzers about end of file being seen. + * Inform attached analyzers about end of file being seen. */ void EndOfFile(); /** - * Inform attached actions/analyzers about a gap in file stream. + * Inform attached analyzers about a gap in file stream. */ void Gap(uint64 offset, uint64 len); @@ -184,7 +184,7 @@ protected: bool missed_bof; /**< Flags that we missed start of file. */ bool need_reassembly; /**< Whether file stream reassembly is needed. */ bool done; /**< If this object is about to be deleted. */ - ActionSet actions; + AnalyzerSet analyzers; struct BOF_Buffer { BOF_Buffer() : full(false), replayed(false), size(0) {} diff --git a/src/file_analysis/Hash.cc b/src/file_analysis/Hash.cc index 320fb9aa02..7b36eb007f 100644 --- a/src/file_analysis/Hash.cc +++ b/src/file_analysis/Hash.cc @@ -7,7 +7,7 @@ using namespace file_analysis; Hash::Hash(RecordVal* args, File* file, HashVal* hv, const char* arg_kind) - : Action(args, file), hash(hv), fed(false), kind(arg_kind) + : file_analysis::Analyzer(args, file), hash(hv), fed(false), kind(arg_kind) { hash->Init(); } diff --git a/src/file_analysis/Hash.h b/src/file_analysis/Hash.h index cffca602ba..2456777281 100644 --- a/src/file_analysis/Hash.h +++ b/src/file_analysis/Hash.h @@ -6,14 +6,14 @@ #include "Val.h" #include "OpaqueVal.h" #include "File.h" -#include "Action.h" +#include "Analyzer.h" namespace file_analysis { /** - * An action to produce a hash of file contents. + * An analyzer to produce a hash of file contents. */ -class Hash : public Action { +class Hash : public file_analysis::Analyzer { public: virtual ~Hash(); @@ -38,7 +38,7 @@ protected: class MD5 : public Hash { public: - static Action* Instantiate(RecordVal* args, File* file) + static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file) { return file_hash ? new MD5(args, file) : 0; } protected: @@ -51,7 +51,7 @@ protected: class SHA1 : public Hash { public: - static Action* Instantiate(RecordVal* args, File* file) + static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file) { return file_hash ? new SHA1(args, file) : 0; } protected: @@ -64,7 +64,7 @@ protected: class SHA256 : public Hash { public: - static Action* Instantiate(RecordVal* args, File* file) + static file_analysis::Analyzer* Instantiate(RecordVal* args, File* file) { return file_hash ? new SHA256(args, file) : 0; } protected: diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index 4f7443d535..31d548f4e4 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -3,7 +3,7 @@ #include "Manager.h" #include "File.h" -#include "Action.h" +#include "Analyzer.h" #include "Var.h" #include "Event.h" @@ -167,22 +167,22 @@ bool Manager::SetTimeoutInterval(const FileID& file_id, double interval) const return true; } -bool Manager::AddAction(const FileID& file_id, RecordVal* args) const +bool Manager::AddAnalyzer(const FileID& file_id, RecordVal* args) const { File* file = Lookup(file_id); if ( ! file ) return false; - return file->AddAction(args); + return file->AddAnalyzer(args); } -bool Manager::RemoveAction(const FileID& file_id, const RecordVal* args) const +bool Manager::RemoveAnalyzer(const FileID& file_id, const RecordVal* args) const { File* file = Lookup(file_id); if ( ! file ) return false; - return file->RemoveAction(args); + return file->RemoveAnalyzer(args); } File* Manager::GetFile(const string& unique, Connection* conn, diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index 26d07cd5c4..f22c919736 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -102,18 +102,18 @@ public: bool SetTimeoutInterval(const FileID& file_id, double interval) const; /** - * Queue attachment of an action to the file identifier. Multiple actions - * of a given type can be attached per file identifier at a time as long as - * the arguments differ. - * @return false if the action failed to be instantiated, else true. + * Queue attachment of an analzer to the file identifier. Multiple + * analyzers of a given type can be attached per file identifier at a time + * as long as the arguments differ. + * @return false if the analyzer failed to be instantiated, else true. */ - bool AddAction(const FileID& file_id, RecordVal* args) const; + bool AddAnalyzer(const FileID& file_id, RecordVal* args) const; /** - * Queue removal of an action for a given file identifier. - * @return true if the action is active at the time of call, else false. + * Queue removal of an analyzer for a given file identifier. + * @return true if the analyzer is active at the time of call, else false. */ - bool RemoveAction(const FileID& file_id, const RecordVal* args) const; + bool RemoveAnalyzer(const FileID& file_id, const RecordVal* args) const; /** * @return whether the file mapped to \a unique is being ignored. diff --git a/src/types.bif b/src/types.bif index b69239487b..954c33ce21 100644 --- a/src/types.bif +++ b/src/types.bif @@ -229,25 +229,25 @@ type gtp_gsn_addr: record; module FileAnalysis; -type ActionArgs: record; +type AnalyzerArgs: record; ## An enumeration of various file analysis actions that can be taken. -enum Action %{ +enum Analyzer %{ ## Extract a file to local filesystem - ACTION_EXTRACT, + ANALYZER_EXTRACT, ## Calculate an MD5 digest of the file's contents. - ACTION_MD5, + ANALYZER_MD5, ## Calculate an SHA1 digest of the file's contents. - ACTION_SHA1, + ANALYZER_SHA1, ## Calculate an SHA256 digest of the file's contents. - ACTION_SHA256, + ANALYZER_SHA256, ## Deliver the file contents to the script-layer in an event. - ACTION_DATA_EVENT, + ANALYZER_DATA_EVENT, %} module GLOBAL; diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log index 8e04fefa81..c67b9125f5 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log @@ -3,8 +3,8 @@ #empty_field (empty) #unset_field - #path file_analysis -#open 2013-04-11-17-29-51 -#fields id parent_id source last_active seen_bytes total_bytes missing_bytes overflow_bytes timeout_interval bof_buffer_size mime_type timedout conn_uids actions_taken extracted_files md5 sha1 sha256 +#open 2013-04-11-19-37-28 +#fields id parent_id source last_active seen_bytes total_bytes missing_bytes overflow_bytes timeout_interval bof_buffer_size mime_type timedout conn_uids analyzers extracted_files md5 sha1 sha256 #types string string string time count count count count interval count string bool table[string] table[enum] table[string] string string string -Cx92a0ym5R8 - HTTP 1362692527.009775 4705 4705 0 0 120.000000 1024 set F UWkUyAuUGXf FileAnalysis::ACTION_SHA1,FileAnalysis::ACTION_EXTRACT,FileAnalysis::ACTION_DATA_EVENT,FileAnalysis::ACTION_MD5,FileAnalysis::ACTION_SHA256 Cx92a0ym5R8-file 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18 -#close 2013-04-11-17-29-51 +Cx92a0ym5R8 - HTTP 1362692527.009775 4705 4705 0 0 120.000000 1024 set F UWkUyAuUGXf FileAnalysis::ANALYZER_SHA1,FileAnalysis::ANALYZER_EXTRACT,FileAnalysis::ANALYZER_DATA_EVENT,FileAnalysis::ANALYZER_MD5,FileAnalysis::ANALYZER_SHA256 Cx92a0ym5R8-file 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18 +#close 2013-04-11-19-37-28 diff --git a/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro b/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro index de3006d1f6..1f15a4221f 100644 --- a/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro +++ b/testing/btest/scripts/base/frameworks/file-analysis/bifs/remove_action.bro @@ -10,9 +10,9 @@ redef test_get_file_name = function(f: fa_file): string event file_new(f: fa_file) &priority=-10 { - for ( act in test_file_actions ) - FileAnalysis::remove_action(f, act); + for ( tag in test_file_analyzers ) + FileAnalysis::remove_analyzer(f, tag); local filename = test_get_file_name(f); - FileAnalysis::remove_action(f, [$act=FileAnalysis::ACTION_EXTRACT, + FileAnalysis::remove_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, $extract_filename=filename]); } diff --git a/testing/scripts/file-analysis-test.bro b/testing/scripts/file-analysis-test.bro index a314568b5f..be8b4eadd6 100644 --- a/testing/scripts/file-analysis-test.bro +++ b/testing/scripts/file-analysis-test.bro @@ -1,7 +1,7 @@ global test_file_analysis_source: string = "" &redef; -global test_file_actions: set[FileAnalysis::ActionArgs]; +global test_file_analyzers: set[FileAnalysis::AnalyzerArgs]; global test_get_file_name: function(f: fa_file): string = function(f: fa_file): string { return ""; } &redef; @@ -29,16 +29,16 @@ event file_new(f: fa_file) if ( test_file_analysis_source == "" || f$source == test_file_analysis_source ) { - for ( act in test_file_actions ) - FileAnalysis::add_action(f, act); + for ( tag in test_file_analyzers ) + FileAnalysis::add_analyzer(f, tag); local filename: string = test_get_file_name(f); if ( filename != "" ) - FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_EXTRACT, - $extract_filename=filename]); - FileAnalysis::add_action(f, [$act=FileAnalysis::ACTION_DATA_EVENT, - $chunk_event=file_chunk, - $stream_event=file_stream]); + FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_EXTRACT, + $extract_filename=filename]); + FileAnalysis::add_analyzer(f, [$tag=FileAnalysis::ANALYZER_DATA_EVENT, + $chunk_event=file_chunk, + $stream_event=file_stream]); } if ( f?$bof_buffer ) @@ -96,7 +96,7 @@ event file_state_remove(f: fa_file) event bro_init() { - add test_file_actions[[$act=FileAnalysis::ACTION_MD5]]; - add test_file_actions[[$act=FileAnalysis::ACTION_SHA1]]; - add test_file_actions[[$act=FileAnalysis::ACTION_SHA256]]; + add test_file_analyzers[[$tag=FileAnalysis::ANALYZER_MD5]]; + add test_file_analyzers[[$tag=FileAnalysis::ANALYZER_SHA1]]; + add test_file_analyzers[[$tag=FileAnalysis::ANALYZER_SHA256]]; } From 037d582b0e8222f64f30e95e0973f9073e6adc9e Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Fri, 12 Apr 2013 11:58:19 -0500 Subject: [PATCH 05/10] FileAnalysis: add custom libmagic database. - It's derived from the magic database of libmagic 5.14, but with most everything not related to mime types removed. - The custom database is always used by default for mime detection, but the more verbose file type detection will fall back on the default libmagic installation's database. The result is: mime type strings are now guaranteed to be consistent across platforms, but the verbose file type descriptions are not. - The custom database gets installed in $prefix/share/bro/magic, and should even be extensible if files with new patterns are added inside the directory. - The search path for the mime magic database can be controlled via BROMAGIC environment variable. - Remove mime_desc field from ftp.log. - Stop using the mime/file type canonifier with unit tests. - libmagic >= 5.04 is now a requirement. --- CMakeLists.txt | 16 + cmake | 2 +- magic/COPYING | 29 ++ magic/animation | 208 ++++++++++ magic/archive | 242 ++++++++++++ magic/assembler | 19 + magic/audio | 149 +++++++ magic/c-lang | 47 +++ magic/cafebabe | 31 ++ magic/commands | 82 ++++ magic/compress | 77 ++++ magic/database | 47 +++ magic/diff | 25 ++ magic/elf | 43 ++ magic/epoc | 34 ++ magic/filesystems | 12 + magic/flash | 18 + magic/fonts | 32 ++ magic/fortran | 7 + magic/frame | 31 ++ magic/gimp | 13 + magic/gnu | 23 ++ magic/gnumeric | 8 + magic/icc | 51 +++ magic/iff | 21 + magic/images | 255 ++++++++++++ magic/java | 16 + magic/javascript | 17 + magic/jpeg | 31 ++ magic/kde | 11 + magic/kml | 30 ++ magic/linux | 22 ++ magic/lisp | 42 ++ magic/lua | 17 + magic/m4 | 7 + magic/macintosh | 21 + magic/mail.news | 35 ++ magic/make | 16 + magic/marc21 | 29 ++ magic/matroska | 17 + magic/misctools | 9 + magic/msdos | 368 ++++++++++++++++++ magic/neko | 12 + magic/pascal | 11 + magic/pdf | 8 + magic/perl | 26 ++ magic/pgp | 27 ++ magic/pkgadd | 7 + magic/printer | 14 + magic/python | 46 +++ magic/riff | 36 ++ magic/rpm | 12 + magic/rtf | 9 + magic/ruby | 28 ++ magic/sc | 7 + magic/sgml | 82 ++++ magic/sniffer | 17 + magic/tcl | 23 ++ magic/tex | 56 +++ magic/troff | 22 ++ magic/vorbis | 26 ++ magic/warc | 14 + magic/windows | 19 + magic/wordprocessors | 43 ++ magic/xwindows | 11 + scripts/base/protocols/ftp/main.bro | 4 - src/main.cc | 1 + src/util-config.h.in | 1 + src/util.cc | 22 +- src/util.h | 1 + .../Baseline/core.tunnels.ayiya/http.log | 2 +- .../http.log | 4 +- .../core.tunnels.gtp.outer_ip_frag/http.log | 2 +- .../Baseline/core.tunnels.teredo/http.log | 8 +- .../http.log | 4 +- .../out | 4 +- .../bro..stdout | 8 +- .../get.out | 4 +- .../get.out | 4 +- .../out | 4 +- .../get-gzip.out | 4 +- .../get.out | 4 +- .../a.out | 4 +- .../b.out | 8 +- .../c.out | 4 +- .../out | 20 +- .../out | 8 +- .../bro..stdout | 4 +- .../out | 4 +- .../file_analysis.log | 6 +- .../out | 12 +- .../http.log | 2 +- .../ftp.log | 32 +- .../ftp.log | 20 +- .../ftp.log | 22 +- .../http.log | 2 +- .../http.log | 2 +- .../http.log | 92 ++--- .../http.log | 10 +- .../smtp_entities.log | 6 +- .../smtp_entities.log | 6 +- testing/btest/btest.cfg | 1 + testing/external/subdir-btest.cfg | 3 +- testing/scripts/diff-canonifier | 3 +- testing/scripts/diff-canonifier-external | 1 - testing/scripts/file-analysis-test.bro | 6 +- 106 files changed, 2951 insertions(+), 174 deletions(-) create mode 100644 magic/COPYING create mode 100644 magic/animation create mode 100644 magic/archive create mode 100644 magic/assembler create mode 100644 magic/audio create mode 100644 magic/c-lang create mode 100644 magic/cafebabe create mode 100644 magic/commands create mode 100644 magic/compress create mode 100644 magic/database create mode 100644 magic/diff create mode 100644 magic/elf create mode 100644 magic/epoc create mode 100644 magic/filesystems create mode 100644 magic/flash create mode 100644 magic/fonts create mode 100644 magic/fortran create mode 100644 magic/frame create mode 100644 magic/gimp create mode 100644 magic/gnu create mode 100644 magic/gnumeric create mode 100644 magic/icc create mode 100644 magic/iff create mode 100644 magic/images create mode 100644 magic/java create mode 100644 magic/javascript create mode 100644 magic/jpeg create mode 100644 magic/kde create mode 100644 magic/kml create mode 100644 magic/linux create mode 100644 magic/lisp create mode 100644 magic/lua create mode 100644 magic/m4 create mode 100644 magic/macintosh create mode 100644 magic/mail.news create mode 100644 magic/make create mode 100644 magic/marc21 create mode 100644 magic/matroska create mode 100644 magic/misctools create mode 100644 magic/msdos create mode 100644 magic/neko create mode 100644 magic/pascal create mode 100644 magic/pdf create mode 100644 magic/perl create mode 100644 magic/pgp create mode 100644 magic/pkgadd create mode 100644 magic/printer create mode 100644 magic/python create mode 100644 magic/riff create mode 100644 magic/rpm create mode 100644 magic/rtf create mode 100644 magic/ruby create mode 100644 magic/sc create mode 100644 magic/sgml create mode 100644 magic/sniffer create mode 100644 magic/tcl create mode 100644 magic/tex create mode 100644 magic/troff create mode 100644 magic/vorbis create mode 100644 magic/warc create mode 100644 magic/windows create mode 100644 magic/wordprocessors create mode 100644 magic/xwindows diff --git a/CMakeLists.txt b/CMakeLists.txt index e2a83e10f5..b95b637770 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,12 +17,17 @@ set(BRO_SCRIPT_SOURCE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/scripts) get_filename_component(BRO_SCRIPT_INSTALL_PATH ${BRO_SCRIPT_INSTALL_PATH} ABSOLUTE) +set(BRO_MAGIC_INSTALL_PATH ${BRO_ROOT_DIR}/share/bro/magic) +set(BRO_MAGIC_SOURCE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/magic) + configure_file(bro-path-dev.in ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev) file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.sh "export BROPATH=`${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev`\n" + "export BROMAGIC=\"${BRO_MAGIC_SOURCE_PATH}\"\n" "export PATH=\"${CMAKE_CURRENT_BINARY_DIR}/src\":$PATH\n") file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.csh "setenv BROPATH `${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev`\n" + "setenv BROMAGIC \"${BRO_MAGIC_SOURCE_PATH}\"\n" "setenv PATH \"${CMAKE_CURRENT_BINARY_DIR}/src\":$PATH\n") file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/VERSION" VERSION LIMIT_COUNT 1) @@ -69,6 +74,12 @@ if (MISSING_PREREQS) message(FATAL_ERROR "Configuration aborted due to missing prerequisites") endif () +set(libmagic_req 5.04) +if ( LibMagic_VERSION VERSION_LESS ${libmagic_req} ) + message(FATAL_ERROR "libmagic of at least version ${libmagic_req} required " + "(found ${LibMagic_VERSION})") +endif () + include_directories(BEFORE ${PCAP_INCLUDE_DIR} ${OpenSSL_INCLUDE_DIR} @@ -190,6 +201,11 @@ CheckOptionalBuildSources(aux/broctl Broctl INSTALL_BROCTL) CheckOptionalBuildSources(aux/bro-aux Bro-Aux INSTALL_AUX_TOOLS) CheckOptionalBuildSources(aux/broccoli Broccoli INSTALL_BROCCOLI) +install(DIRECTORY ./magic/ DESTINATION ${BRO_MAGIC_INSTALL_PATH} FILES_MATCHING + PATTERN "COPYING" EXCLUDE + PATTERN "*" +) + ######################################################################## ## Packaging Setup diff --git a/cmake b/cmake index 94e72a3075..1bfdacb892 160000 --- a/cmake +++ b/cmake @@ -1 +1 @@ -Subproject commit 94e72a3075bb0b9550ad05758963afda394bfb2c +Subproject commit 1bfdacb8921ab0b40099f5fde7a611167bf310c3 diff --git a/magic/COPYING b/magic/COPYING new file mode 100644 index 0000000000..7d2bf1e711 --- /dev/null +++ b/magic/COPYING @@ -0,0 +1,29 @@ +# $File: LEGAL.NOTICE,v 1.15 2006/05/03 18:48:33 christos Exp $ +# Copyright (c) Ian F. Darwin 1986, 1987, 1989, 1990, 1991, 1992, 1994, 1995. +# Software written by Ian F. Darwin and others; +# maintained 1994- Christos Zoulas. +# +# This software is not subject to any export provision of the United States +# Department of Commerce, and may be exported to any country or planet. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice immediately at the beginning of the file, without modification, +# this list of conditions, and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. diff --git a/magic/animation b/magic/animation new file mode 100644 index 0000000000..0cec03d511 --- /dev/null +++ b/magic/animation @@ -0,0 +1,208 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: animation,v 1.47 2013/02/06 14:18:52 christos Exp $ +# animation: file(1) magic for animation/movie formats +# +# animation formats +# MPEG, FLI, DL originally from vax@ccwf.cc.utexas.edu (VaX#n8) +# FLC, SGI, Apple originally from Daniel Quinlan (quinlan@yggdrasil.com) + +# SGI and Apple formats +0 string MOVI Silicon Graphics movie file +!:mime video/x-sgi-movie +4 string moov Apple QuickTime +!:mime video/quicktime +4 string mdat Apple QuickTime movie (unoptimized) +!:mime video/quicktime +#4 string wide Apple QuickTime movie (unoptimized) +#!:mime video/quicktime +#4 string skip Apple QuickTime movie (modified) +#!:mime video/quicktime +#4 string free Apple QuickTime movie (modified) +#!:mime video/quicktime +4 string idsc Apple QuickTime image (fast start) +!:mime image/x-quicktime +#4 string idat Apple QuickTime image (unoptimized) +#!:mime image/x-quicktime +4 string pckg Apple QuickTime compressed archive +!:mime application/x-quicktime-player +4 string/W jP JPEG 2000 image +!:mime image/jp2 +4 string ftyp ISO Media +>8 string isom \b, MPEG v4 system, version 1 +!:mime video/mp4 +>8 string mp41 \b, MPEG v4 system, version 1 +!:mime video/mp4 +>8 string mp42 \b, MPEG v4 system, version 2 +!:mime video/mp4 +>8 string/W jp2 \b, JPEG 2000 +!:mime image/jp2 +>8 string 3ge \b, MPEG v4 system, 3GPP +!:mime video/3gpp +>8 string 3gg \b, MPEG v4 system, 3GPP +!:mime video/3gpp +>8 string 3gp \b, MPEG v4 system, 3GPP +!:mime video/3gpp +>8 string 3gs \b, MPEG v4 system, 3GPP +!:mime video/3gpp +>8 string 3g2 \b, MPEG v4 system, 3GPP2 +!:mime video/3gpp2 +>8 string mmp4 \b, MPEG v4 system, 3GPP Mobile +!:mime video/mp4 +>8 string avc1 \b, MPEG v4 system, 3GPP JVT AVC +!:mime video/3gpp +>8 string/W M4A \b, MPEG v4 system, iTunes AAC-LC +!:mime audio/mp4 +>8 string/W M4V \b, MPEG v4 system, iTunes AVC-LC +!:mime video/mp4 +>8 string/W qt \b, Apple QuickTime movie +!:mime video/quicktime + +# MPEG sequences +# Scans for all common MPEG header start codes +0 belong&0xFFFFFF00 0x00000100 +>3 byte 0xBA MPEG sequence +!:mime video/mpeg +# GRR too general as it catches also FoxPro Memo example NG.FPT +>3 byte 0xB0 MPEG sequence, v4 +!:mime video/mpeg4-generic +>3 byte 0xB5 MPEG sequence, v4 +!:mime video/mpeg4-generic +>3 byte 0xB3 MPEG sequence +!:mime video/mpeg + +# MPEG ADTS Audio (*.mpx/mxa/aac) +# from dreesen@math.fu-berlin.de +# modified to fully support MPEG ADTS + +# MP3, M1A +# modified by Joerg Jenderek +# GRR the original test are too common for many DOS files +# so don't accept as MP3 until we've tested the rate +0 beshort&0xFFFE 0xFFFA +# rates +>2 byte&0xF0 0x10 MPEG ADTS, layer III, v1, 32 kbps +!:mime audio/mpeg +>2 byte&0xF0 0x20 MPEG ADTS, layer III, v1, 40 kbps +!:mime audio/mpeg +>2 byte&0xF0 0x30 MPEG ADTS, layer III, v1, 48 kbps +!:mime audio/mpeg +>2 byte&0xF0 0x40 MPEG ADTS, layer III, v1, 56 kbps +!:mime audio/mpeg +>2 byte&0xF0 0x50 MPEG ADTS, layer III, v1, 64 kbps +!:mime audio/mpeg +>2 byte&0xF0 0x60 MPEG ADTS, layer III, v1, 80 kbps +!:mime audio/mpeg +>2 byte&0xF0 0x70 MPEG ADTS, layer III, v1, 96 kbps +!:mime audio/mpeg +>2 byte&0xF0 0x80 MPEG ADTS, layer III, v1, 112 kbps +!:mime audio/mpeg +>2 byte&0xF0 0x90 MPEG ADTS, layer III, v1, 128 kbps +!:mime audio/mpeg +>2 byte&0xF0 0xA0 MPEG ADTS, layer III, v1, 160 kbps +!:mime audio/mpeg +>2 byte&0xF0 0xB0 MPEG ADTS, layer III, v1, 192 kbps +!:mime audio/mpeg +>2 byte&0xF0 0xC0 MPEG ADTS, layer III, v1, 224 kbps +!:mime audio/mpeg +>2 byte&0xF0 0xD0 MPEG ADTS, layer III, v1, 256 kbps +!:mime audio/mpeg +>2 byte&0xF0 0xE0 MPEG ADTS, layer III, v1, 320 kbps +!:mime audio/mpeg + +# MP2, M1A +0 beshort&0xFFFE 0xFFFC MPEG ADTS, layer II, v1 +!:mime audio/mpeg + +# MP3, M2A +0 beshort&0xFFFE 0xFFF2 MPEG ADTS, layer III, v2 +!:mime audio/mpeg + +# MPA, M2A +0 beshort&0xFFFE 0xFFF6 MPEG ADTS, layer I, v2 +!:mime audio/mpeg + +# MP3, M25A +0 beshort&0xFFFE 0xFFE2 MPEG ADTS, layer III, v2.5 +!:mime audio/mpeg + +# Stored AAC streams (instead of the MP4 format) +0 string ADIF MPEG ADIF, AAC +!:mime audio/x-hx-aac-adif + +# Live or stored single AAC stream (used with MPEG-2 systems) +0 beshort&0xFFF6 0xFFF0 MPEG ADTS, AAC +!:mime audio/x-hx-aac-adts + +# Live MPEG-4 audio streams (instead of RTP FlexMux) +0 beshort&0xFFE0 0x56E0 MPEG-4 LOAS +!:mime audio/x-mp4a-latm + +# This magic isn't strong enough (matches plausible ISO-8859-1 text) +#0 beshort 0x4DE1 MPEG-4 LO-EP audio stream +#!:mime audio/x-mp4a-latm + +# Summary: FLI animation format +# Created by: Daniel Quinlan +# Modified by (1): Abel Cheung (avoid over-generic detection) +4 leshort 0xAF11 +# standard FLI always has 320x200 resolution and 8 bit color +>8 leshort 320 +>>10 leshort 200 +>>>12 leshort 8 FLI animation, 320x200x8 +!:mime video/x-fli + +# Summary: FLC animation format +# Created by: Daniel Quinlan +# Modified by (1): Abel Cheung (avoid over-generic detection) +4 leshort 0xAF12 +# standard FLC always use 8 bit color +>12 leshort 8 FLC animation +!:mime video/x-flc + +# Microsoft Advanced Streaming Format (ASF) +0 belong 0x3026b275 Microsoft ASF +!:mime video/x-ms-asf + +# MNG Video Format, +0 string \x8aMNG MNG video data, +!:mime video/x-mng + +# JNG Video Format, +0 string \x8bJNG JNG video data, +!:mime video/x-jng + +# VRML (Virtual Reality Modelling Language) +0 string/w #VRML\ V1.0\ ascii VRML 1 file +!:mime model/vrml +0 string/w #VRML\ V2.0\ utf8 ISO/IEC 14772 VRML 97 file +!:mime model/vrml + +# X3D (Extensible 3D) [http://www.web3d.org/specifications/x3d-3.0.dtd] +# From Michel Briand +0 string/t \20 search/1000/cw \4 byte &0x40 +!:mime video/mp2p +>4 byte ^0x40 +!:mime video/mpeg +0 belong 0x000001BB +!:mime video/mpeg +0 belong 0x000001B0 +!:mime video/mp4v-es +0 belong 0x000001B5 +!:mime video/mp4v-es +0 belong 0x000001B3 +!:mime video/mpv +0 belong&0xFF5FFF1F 0x47400010 +!:mime video/mp2t +0 belong 0x00000001 +>4 byte&0x1F 0x07 +!:mime video/h264 diff --git a/magic/archive b/magic/archive new file mode 100644 index 0000000000..35cbef4012 --- /dev/null +++ b/magic/archive @@ -0,0 +1,242 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: archive,v 1.78 2013/02/06 14:18:52 christos Exp $ +# archive: file(1) magic for archive formats (see also "msdos" for self- +# extracting compressed archives) +# +# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc. +# pre-POSIX "tar" archives are handled in the C code. + +# POSIX tar archives +257 string ustar\0 POSIX tar archive +!:mime application/x-tar # encoding: posix +257 string ustar\040\040\0 GNU tar archive +!:mime application/x-tar # encoding: gnu + +# cpio archives +# +# Yes, the top two "cpio archive" formats *are* supposed to just be "short". +# The idea is to indicate archives produced on machines with the same +# byte order as the machine running "file" with "cpio archive", and +# to indicate archives produced on machines with the opposite byte order +# from the machine running "file" with "byte-swapped cpio archive". +# +# The SVR4 "cpio(4)" hints that there are additional formats, but they +# are defined as "short"s; I think all the new formats are +# character-header formats and thus are strings, not numbers. +0 short 070707 cpio archive +!:mime application/x-cpio +0 short 0143561 byte-swapped cpio archive +!:mime application/x-cpio # encoding: swapped + +# +# System V Release 1 portable(?) archive format. +# +0 string = System V Release 1 ar archive +!:mime application/x-archive + +# +# Debian package; it's in the portable archive format, and needs to go +# before the entry for regular portable archives, as it's recognized as +# a portable archive whose first member has a name beginning with +# "debian". +# +0 string =!\ndebian +!:mime application/x-debian-package + +# +# MIPS archive; they're in the portable archive format, and need to go +# before the entry for regular portable archives, as it's recognized as +# a portable archive whose first member has a name beginning with +# "__________E". +# +0 string =!\n__________E MIPS archive +!:mime application/x-archive + +# +# BSD/SVR2-and-later portable archive formats. +# +0 string =! current ar archive +!:mime application/x-archive + +# ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com) +# +# The first byte is the magic (0x1a), byte 2 is the compression type for +# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS +# filename of the first file (null terminated). Since some types collide +# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%), +# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%). 0x01 collides with terminfo. +0 lelong&0x8080ffff 0x0000081a ARC archive data, dynamic LZW +!:mime application/x-arc +0 lelong&0x8080ffff 0x0000091a ARC archive data, squashed +!:mime application/x-arc +0 lelong&0x8080ffff 0x0000021a ARC archive data, uncompressed +!:mime application/x-arc +0 lelong&0x8080ffff 0x0000031a ARC archive data, packed +!:mime application/x-arc +0 lelong&0x8080ffff 0x0000041a ARC archive data, squeezed +!:mime application/x-arc +0 lelong&0x8080ffff 0x0000061a ARC archive data, crunched +!:mime application/x-arc +# [JW] stuff taken from idarc, obviously ARC successors: +0 lelong&0x8080ffff 0x00000a1a PAK archive data +!:mime application/x-arc +0 lelong&0x8080ffff 0x0000141a ARC+ archive data +!:mime application/x-arc +0 lelong&0x8080ffff 0x0000481a HYP archive data +!:mime application/x-arc + +# ARJ archiver (jason@jarthur.Claremont.EDU) +0 leshort 0xea60 ARJ archive data +!:mime application/x-arj + +# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu) +2 string -lh0- LHarc 1.x/ARX archive data [lh0] +!:mime application/x-lharc +2 string -lh1- LHarc 1.x/ARX archive data [lh1] +!:mime application/x-lharc +2 string -lz4- LHarc 1.x archive data [lz4] +!:mime application/x-lharc +2 string -lz5- LHarc 1.x archive data [lz5] +!:mime application/x-lharc +# [never seen any but the last; -lh4- reported in comp.compression:] +2 string -lzs- LHa/LZS archive data [lzs] +!:mime application/x-lha +2 string -lh\40- LHa 2.x? archive data [lh ] +!:mime application/x-lha +2 string -lhd- LHa 2.x? archive data [lhd] +!:mime application/x-lha +2 string -lh2- LHa 2.x? archive data [lh2] +!:mime application/x-lha +2 string -lh3- LHa 2.x? archive data [lh3] +!:mime application/x-lha +2 string -lh4- LHa (2.x) archive data [lh4] +!:mime application/x-lha +2 string -lh5- LHa (2.x) archive data [lh5] +!:mime application/x-lha +2 string -lh6- LHa (2.x) archive data [lh6] +!:mime application/x-lha +2 string -lh7- LHa (2.x)/LHark archive data [lh7] +!:mime application/x-lha + +# RAR archiver (Greg Roelofs, newt@uchicago.edu) +0 string Rar! RAR archive data, +!:mime application/x-rar + +# PKZIP multi-volume archive +0 string PK\x07\x08PK\x03\x04 Zip multi-volume archive data, at least PKZIP v2.50 to extract +!:mime application/zip + +# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu) +0 string PK\003\004 + +# Specialised zip formats which start with a member named 'mimetype' +# (stored uncompressed, with no 'extra field') containing the file's MIME type. +# Check for have 8-byte name, 0-byte extra field, name "mimetype", and +# contents starting with "application/": +>26 string \x8\0\0\0mimetypeapplication/ + +# OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8) +# http://lists.oasis-open.org/archives/office/200505/msg00006.html +# (mimetype contains "application/vnd.oasis.opendocument.") +>>50 string vnd.oasis.opendocument. OpenDocument +>>>73 string text +>>>>77 byte !0x2d Text +!:mime application/vnd.oasis.opendocument.text +>>>>77 string -template Text Template +!:mime application/vnd.oasis.opendocument.text-template +>>>>77 string -web HTML Document Template +!:mime application/vnd.oasis.opendocument.text-web +>>>>77 string -master Master Document +!:mime application/vnd.oasis.opendocument.text-master +>>>73 string graphics +>>>>81 byte !0x2d Drawing +!:mime application/vnd.oasis.opendocument.graphics +>>>>81 string -template Template +!:mime application/vnd.oasis.opendocument.graphics-template +>>>73 string presentation +>>>>85 byte !0x2d Presentation +!:mime application/vnd.oasis.opendocument.presentation +>>>>85 string -template Template +!:mime application/vnd.oasis.opendocument.presentation-template +>>>73 string spreadsheet +>>>>84 byte !0x2d Spreadsheet +!:mime application/vnd.oasis.opendocument.spreadsheet +>>>>84 string -template Template +!:mime application/vnd.oasis.opendocument.spreadsheet-template +>>>73 string chart +>>>>78 byte !0x2d Chart +!:mime application/vnd.oasis.opendocument.chart +>>>>78 string -template Template +!:mime application/vnd.oasis.opendocument.chart-template +>>>73 string formula +>>>>80 byte !0x2d Formula +!:mime application/vnd.oasis.opendocument.formula +>>>>80 string -template Template +!:mime application/vnd.oasis.opendocument.formula-template +>>>73 string database Database +!:mime application/vnd.oasis.opendocument.database +>>>73 string image +>>>>78 byte !0x2d Image +!:mime application/vnd.oasis.opendocument.image +>>>>78 string -template Template +!:mime application/vnd.oasis.opendocument.image-template + +# EPUB (OEBPS) books using OCF (OEBPS Container Format) +# http://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4. +# From: Ralf Brown +>0x1E string mimetypeapplication/epub+zip EPUB document +!:mime application/epub+zip + +# Catch other ZIP-with-mimetype formats +# In a ZIP file, the bytes immediately after a member's contents are +# always "PK". The 2 regex rules here print the "mimetype" member's +# contents up to the first 'P'. Luckily, most MIME types don't contain +# any capital 'P's. This is a kludge. +# (mimetype contains "application/") +>>50 string !epub+zip +>>>50 string !vnd.oasis.opendocument. +>>>>50 string !vnd.sun.xml. +>>>>>50 string !vnd.kde. +>>>>>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?) +!:mime application/zip +# (mimetype contents other than "application/*") +>26 string \x8\0\0\0mimetype +>>38 string !application/ +>>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?) +!:mime application/zip + +# Java Jar files +>(26.s+30) leshort 0xcafe Java Jar file data (zip) +!:mime application/jar + +# Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu) +# Next line excludes specialized formats: +>(26.s+30) leshort !0xcafe +>>26 string !\x8\0\0\0mimetype Zip archive data +!:mime application/zip + +# Zoo archiver +20 lelong 0xfdc4a7dc Zoo archive data +!:mime application/x-zoo + +# Shell archives +10 string #\ This\ is\ a\ shell\ archive shell archive text +!:mime application/octet-stream + +# Felix von Leitner +0 string d8:announce BitTorrent file +!:mime application/x-bittorrent + +# EET archive +# From: Tilman Sauerbeck +0 belong 0x1ee7ff00 EET archive +!:mime application/x-eet + +# Symbian installation files +# http://www.thouky.co.uk/software/psifs/sis.html +# http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf +8 lelong 0x10000419 Symbian installation file +!:mime application/vnd.symbian.install +0 lelong 0x10201A7A Symbian installation file (Symbian OS 9.x) +!:mime x-epoc/x-sisx-app diff --git a/magic/assembler b/magic/assembler new file mode 100644 index 0000000000..242b6e19e2 --- /dev/null +++ b/magic/assembler @@ -0,0 +1,19 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: assembler,v 1.3 2013/01/04 17:23:28 christos Exp $ +# make: file(1) magic for assembler source +# +0 regex \^[\020\t]*\\.asciiz assembler source text +!:mime text/x-asm +0 regex \^[\020\t]*\\.byte assembler source text +!:mime text/x-asm +0 regex \^[\020\t]*\\.even assembler source text +!:mime text/x-asm +0 regex \^[\020\t]*\\.globl assembler source text +!:mime text/x-asm +0 regex \^[\020\t]*\\.text assembler source text +!:mime text/x-asm +0 regex \^[\020\t]*\\.file assembler source text +!:mime text/x-asm +0 regex \^[\020\t]*\\.type assembler source text +!:mime text/x-asm diff --git a/magic/audio b/magic/audio new file mode 100644 index 0000000000..75a9dc536c --- /dev/null +++ b/magic/audio @@ -0,0 +1,149 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: audio,v 1.65 2012/10/31 13:38:40 christos Exp $ +# audio: file(1) magic for sound formats (see also "iff") +# +# Jan Nicolai Langfeldt (janl@ifi.uio.no), Dan Quinlan (quinlan@yggdrasil.com), +# and others +# + +# Sun/NeXT audio data +0 string .snd Sun/NeXT audio data: +>12 belong 1 8-bit ISDN mu-law, +!:mime audio/basic +>12 belong 2 8-bit linear PCM [REF-PCM], +!:mime audio/basic +>12 belong 3 16-bit linear PCM, +!:mime audio/basic +>12 belong 4 24-bit linear PCM, +!:mime audio/basic +>12 belong 5 32-bit linear PCM, +!:mime audio/basic +>12 belong 6 32-bit IEEE floating point, +!:mime audio/basic +>12 belong 7 64-bit IEEE floating point, +!:mime audio/basic +>12 belong 23 8-bit ISDN mu-law compressed (CCITT G.721 ADPCM voice enc.), +!:mime audio/x-adpcm + +# DEC systems (e.g. DECstation 5000) use a variant of the Sun/NeXT format +# that uses little-endian encoding and has a different magic number +0 lelong 0x0064732E DEC audio data: +>12 lelong 1 8-bit ISDN mu-law, +!:mime audio/x-dec-basic +>12 lelong 2 8-bit linear PCM [REF-PCM], +!:mime audio/x-dec-basic +>12 lelong 3 16-bit linear PCM, +!:mime audio/x-dec-basic +>12 lelong 4 24-bit linear PCM, +!:mime audio/x-dec-basic +>12 lelong 5 32-bit linear PCM, +!:mime audio/x-dec-basic +>12 lelong 6 32-bit IEEE floating point, +!:mime audio/x-dec-basic +>12 lelong 7 64-bit IEEE floating point, +!:mime audio/x-dec-basic +>12 lelong 23 8-bit ISDN mu-law compressed (CCITT G.721 ADPCM voice enc.), +!:mime audio/x-dec-basic + +# Creative Labs AUDIO stuff +0 string MThd Standard MIDI data +!:mime audio/midi + +0 string CTMF Creative Music (CMF) data +!:mime audio/x-unknown +0 string SBI SoundBlaster instrument data +!:mime audio/x-unknown +0 string Creative\ Voice\ File Creative Labs voice data +!:mime audio/x-unknown + +# Real Audio (Magic .ra\0375) +0 belong 0x2e7261fd RealAudio sound file +!:mime audio/x-pn-realaudio +0 string .RMF\0\0\0 RealMedia file +!:mime application/vnd.rn-realmedia + +# mime types according to http://www.geocities.com/nevilo/mod.htm: +# audio/it .it +# audio/x-zipped-it .itz +# audio/xm fasttracker modules +# audio/x-s3m screamtracker modules +# audio/s3m screamtracker modules +# audio/x-zipped-mod mdz +# audio/mod mod +# audio/x-mod All modules (mod, s3m, 669, mtm, med, xm, it, mdz, stm, itz, xmz, s3z) + +# +# Taken from loader code from mikmod version 2.14 +# by Steve McIntyre (stevem@chiark.greenend.org.uk) +# added title printing on 2003-06-24 +0 string MAS_UTrack_V00 +>14 string >/0 ultratracker V1.%.1s module sound data +!:mime audio/x-mod +#audio/x-tracker-module + +0 string Extended\ Module: Fasttracker II module sound data +!:mime audio/x-mod +#audio/x-tracker-module + +21 string/c =!SCREAM! Screamtracker 2 module sound data +!:mime audio/x-mod +#audio/x-screamtracker-module +21 string BMOD2STM Screamtracker 2 module sound data +!:mime audio/x-mod +#audio/x-screamtracker-module +1080 string M.K. 4-channel Protracker module sound data +!:mime audio/x-mod +#audio/x-protracker-module +1080 string M!K! 4-channel Protracker module sound data +!:mime audio/x-mod +#audio/x-protracker-module +1080 string FLT4 4-channel Startracker module sound data +!:mime audio/x-mod +#audio/x-startracker-module +1080 string FLT8 8-channel Startracker module sound data +!:mime audio/x-mod +#audio/x-startracker-module +1080 string 4CHN 4-channel Fasttracker module sound data +!:mime audio/x-mod +#audio/x-fasttracker-module +1080 string 6CHN 6-channel Fasttracker module sound data +!:mime audio/x-mod +#audio/x-fasttracker-module +1080 string 8CHN 8-channel Fasttracker module sound data +!:mime audio/x-mod +#audio/x-fasttracker-module +1080 string CD81 8-channel Octalyser module sound data +!:mime audio/x-mod +#audio/x-octalysertracker-module +1080 string OKTA 8-channel Octalyzer module sound data +!:mime audio/x-mod +#audio/x-octalysertracker-module +# Not good enough. +#1082 string CH +#>1080 string >/0 %.2s-channel Fasttracker "oktalyzer" module sound data +1080 string 16CN 16-channel Taketracker module sound data +!:mime audio/x-mod +#audio/x-taketracker-module +1080 string 32CN 32-channel Taketracker module sound data +!:mime audio/x-mod +#audio/x-taketracker-module + +# Impulse tracker module (audio/x-it) +0 string IMPM Impulse Tracker module sound data - +!:mime audio/x-mod + +# Free lossless audio codec +# From: Przemyslaw Augustyniak +0 string fLaC FLAC audio bitstream data +!:mime audio/x-flac + +# Monkey's Audio compressed audio format (.ape) +# From danny.milo@gmx.net (Danny Milosavljevic) +# New version from Abel Cheung +0 string MAC\040 Monkey's Audio compressed format +!:mime audio/x-ape + +# musepak support From: "Jiri Pejchal" +0 string MP+ Musepack audio +!:mime audio/x-musepack diff --git a/magic/c-lang b/magic/c-lang new file mode 100644 index 0000000000..525dc6b599 --- /dev/null +++ b/magic/c-lang @@ -0,0 +1,47 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: c-lang,v 1.16 2011/12/09 08:02:16 rrt Exp $ +# c-lang: file(1) magic for C and related languages programs +# + +# BCPL +0 search/8192 "libhdr" BCPL source text +!:mime text/x-bcpl +0 search/8192 "LIBHDR" BCPL source text +!:mime text/x-bcpl + +# C +0 regex \^#include C source text +!:mime text/x-c +0 regex \^char C source text +!:mime text/x-c +0 regex \^double C source text +!:mime text/x-c +0 regex \^extern C source text +!:mime text/x-c +0 regex \^float C source text +!:mime text/x-c +0 regex \^struct C source text +!:mime text/x-c +0 regex \^union C source text +!:mime text/x-c +0 search/8192 main( C source text +!:mime text/x-c + +# C++ +# The strength of these rules is increased so they beat the C rules above +0 regex \^template C++ source text +!:strength + 5 +!:mime text/x-c++ +0 regex \^virtual C++ source text +!:strength + 5 +!:mime text/x-c++ +0 regex \^class C++ source text +!:strength + 5 +!:mime text/x-c++ +0 regex \^public: C++ source text +!:strength + 5 +!:mime text/x-c++ +0 regex \^private: C++ source text +!:strength + 5 +!:mime text/x-c++ diff --git a/magic/cafebabe b/magic/cafebabe new file mode 100644 index 0000000000..29fefd5f1e --- /dev/null +++ b/magic/cafebabe @@ -0,0 +1,31 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: cafebabe,v 1.13 2013/02/26 21:04:38 christos Exp $ +# Cafe Babes unite! +# +# Since Java bytecode and Mach-O universal binaries have the same magic number, +# the test must be performed in the same "magic" sequence to get both right. +# The long at offset 4 in a Mach-O universal binary tells the number of +# architectures; the short at offset 4 in a Java bytecode file is the JVM minor +# version and the short at offset 6 is the JVM major version. Since there are only +# only 18 labeled Mach-O architectures at current, and the first released +# Java class format was version 43.0, we can safely choose any number +# between 18 and 39 to test the number of architectures against +# (and use as a hack). Let's not use 18, because the Mach-O people +# might add another one or two as time goes by... +# +### JAVA START ### +0 belong 0xcafebabe +!:mime application/x-java-applet + +0 belong 0xcafed00d JAR compressed with pack200, +>5 byte x version %d. +>4 byte x \b%d +!:mime application/x-java-pack200 + +0 belong 0xcafed00d JAR compressed with pack200, +>5 byte x version %d. +>4 byte x \b%d +!:mime application/x-java-pack200 + +### JAVA END ### diff --git a/magic/commands b/magic/commands new file mode 100644 index 0000000000..6ad7699c5e --- /dev/null +++ b/magic/commands @@ -0,0 +1,82 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: commands,v 1.44 2013/02/05 15:20:47 christos Exp $ +# commands: file(1) magic for various shells and interpreters +# +#0 string/w : shell archive or script for antique kernel text +0 string/wt #!\ /bin/sh POSIX shell script text executable +!:mime text/x-shellscript +0 string/wt #!\ /bin/csh C shell script text executable +!:mime text/x-shellscript +# korn shell magic, sent by George Wu, gwu@clyde.att.com +0 string/wt #!\ /bin/ksh Korn shell script text executable +!:mime text/x-shellscript +0 string/wt #!\ /bin/tcsh Tenex C shell script text executable +!:mime text/x-shellscript +0 string/wt #!\ /usr/bin/tcsh Tenex C shell script text executable +!:mime text/x-shellscript +0 string/wt #!\ /usr/local/tcsh Tenex C shell script text executable +!:mime text/x-shellscript +0 string/wt #!\ /usr/local/bin/tcsh Tenex C shell script text executable +!:mime text/x-shellscript + +# +# zsh/ash/ae/nawk/gawk magic from cameron@cs.unsw.oz.au (Cameron Simpson) +0 string/wt #!\ /bin/zsh Paul Falstad's zsh script text executable +!:mime text/x-shellscript +0 string/wt #!\ /usr/bin/zsh Paul Falstad's zsh script text executable +!:mime text/x-shellscript +0 string/wt #!\ /usr/local/bin/zsh Paul Falstad's zsh script text executable +!:mime text/x-shellscript +0 string/wt #!\ /usr/local/bin/ash Neil Brown's ash script text executable +!:mime text/x-shellscript +0 string/wt #!\ /usr/local/bin/ae Neil Brown's ae script text executable +!:mime text/x-shellscript +0 string/wt #!\ /bin/nawk new awk script text executable +!:mime text/x-nawk +0 string/wt #!\ /usr/bin/nawk new awk script text executable +!:mime text/x-nawk +0 string/wt #!\ /usr/local/bin/nawk new awk script text executable +!:mime text/x-nawk +0 string/wt #!\ /bin/gawk GNU awk script text executable +!:mime text/x-gawk +0 string/wt #!\ /usr/bin/gawk GNU awk script text executable +!:mime text/x-gawk +0 string/wt #!\ /usr/local/bin/gawk GNU awk script text executable +!:mime text/x-gawk +# +0 string/wt #!\ /bin/awk awk script text executable +!:mime text/x-awk +0 string/wt #!\ /usr/bin/awk awk script text executable +!:mime text/x-awk + +# bash shell magic, from Peter Tobias (tobias@server.et-inf.fho-emden.de) +0 string/wt #!\ /bin/bash Bourne-Again shell script text executable +!:mime text/x-shellscript +0 string/wt #!\ /usr/bin/bash Bourne-Again shell script text executable +!:mime text/x-shellscript +0 string/wt #!\ /usr/local/bash Bourne-Again shell script text executable +!:mime text/x-shellscript +0 string/wt #!\ /usr/local/bin/bash Bourne-Again shell script text executable +!:mime text/x-shellscript + +# PHP scripts +# Ulf Harnhammar +0 search/1/c = +0 string =24 regex [0-9.]+ \b, version %s +!:mime text/x-php diff --git a/magic/compress b/magic/compress new file mode 100644 index 0000000000..f2598b783f --- /dev/null +++ b/magic/compress @@ -0,0 +1,77 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: compress,v 1.48 2011/12/07 18:39:43 christos Exp $ +# compress: file(1) magic for pure-compression formats (no archives) +# +# compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, etc. +# +# Formats for various forms of compressed data +# Formats for "compress" proper have been moved into "compress.c", +# because it tries to uncompress it to figure out what's inside. + +# standard unix compress +0 string \037\235 compress'd data +!:mime application/x-compress +!:apple LZIVZIVU + +# gzip (GNU zip, not to be confused with Info-ZIP or PKWARE zip archiver) +# Edited by Chris Chittleborough , March 2002 +# * Original filename is only at offset 10 if "extra field" absent +# * Produce shorter output - notably, only report compression methods +# other than 8 ("deflate", the only method defined in RFC 1952). +0 string \037\213 gzip compressed data +!:mime application/x-gzip + +# packed data, Huffman (minimum redundancy) codes on a byte-by-byte basis +0 string \037\036 packed data +!:mime application/octet-stream + +# +# This magic number is byte-order-independent. +0 short 0x1f1f old packed data +!:mime application/octet-stream + +# XXX - why *two* entries for "compacted data", one of which is +# byte-order independent, and one of which is byte-order dependent? +# +0 short 0x1fff compacted data +!:mime application/octet-stream +# This string is valid for SunOS (BE) and a matching "short" is listed +# in the Ultrix (LE) magic file. +0 string \377\037 compacted data +!:mime application/octet-stream +0 short 0145405 huf output +!:mime application/octet-stream + +# bzip2 +0 string BZh bzip2 compressed data +!:mime application/x-bzip2 + +# lzip +0 string LZIP lzip compressed data +!:mime application/x-lzip + +# 7-zip archiver, from Thomas Klausner (wiz@danbala.tuwien.ac.at) +# http://www.7-zip.org or DOC/7zFormat.txt +# +0 string 7z\274\257\047\034 7-zip archive data, +>6 byte x version %d +>7 byte x \b.%d +!:mime application/x-7z-compressed + +# Type: LZMA +0 lelong&0xffffff =0x5d +>12 leshort =0xff LZMA compressed data, +>>5 lequad =0xffffffffffffffff streamed +>>5 lequad !0xffffffffffffffff non-streamed, size %lld +!:mime application/x-lzma + +# http://tukaani.org/xz/xz-file-format.txt +0 ustring \xFD7zXZ\x00 XZ compressed data +!:mime application/x-xz + +# https://github.com/ckolivas/lrzip/blob/master/doc/magic.header.txt +0 string LRZI LRZIP compressed data +>4 byte x - version %d +>5 byte x \b.%d +!:mime application/x-lrzip diff --git a/magic/database b/magic/database new file mode 100644 index 0000000000..f1c09c0629 --- /dev/null +++ b/magic/database @@ -0,0 +1,47 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: database,v 1.32 2013/02/06 14:18:52 christos Exp $ +# database: file(1) magic for various databases +# +# extracted from header/code files by Graeme Wilford (eep2gw@ee.surrey.ac.uk) +# +# +# GDBM magic numbers +# Will be maintained as part of the GDBM distribution in the future. +# +0 belong 0x13579ace GNU dbm 1.x or ndbm database, big endian +!:mime application/x-gdbm +0 lelong 0x13579ace GNU dbm 1.x or ndbm database, little endian +!:mime application/x-gdbm +0 string GDBM GNU dbm 2.x database +!:mime application/x-gdbm +# +# Berkeley DB +# +# Ian Darwin's file /etc/magic files: big/little-endian version. +# +# Hash 1.85/1.86 databases store metadata in network byte order. +# Btree 1.85/1.86 databases store the metadata in host byte order. +# Hash and Btree 2.X and later databases store the metadata in host byte order. + +0 long 0x00061561 Berkeley DB +!:mime application/x-dbm + +# MS Access database +4 string Standard\ Jet\ DB Microsoft Access Database +!:mime application/x-msaccess +4 string Standard\ ACE\ DB Microsoft Access Database +!:mime application/x-msaccess + +# Tokyo Cabinet magic data +# http://tokyocabinet.sourceforge.net/index.html +0 string ToKyO\ CaBiNeT\n Tokyo Cabinet +>14 string x \b (%s) +>32 byte 0 \b, Hash +!:mime application/x-tokyocabinet-hash +>32 byte 1 \b, B+ tree +!:mime application/x-tokyocabinet-btree +>32 byte 2 \b, Fixed-length +!:mime application/x-tokyocabinet-fixed +>32 byte 3 \b, Table +!:mime application/x-tokyocabinet-table diff --git a/magic/diff b/magic/diff new file mode 100644 index 0000000000..b6504f17a0 --- /dev/null +++ b/magic/diff @@ -0,0 +1,25 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: diff,v 1.13 2012/06/16 14:43:36 christos Exp $ +# diff: file(1) magic for diff(1) output +# +0 search/1 diff\ diff output text +!:mime text/x-diff +0 search/1 ***\ diff output text +!:mime text/x-diff +0 search/1 Only\ in\ diff output text +!:mime text/x-diff +0 search/1 Common\ subdirectories:\ diff output text +!:mime text/x-diff + +0 search/1 Index: RCS/CVS diff output text +!:mime text/x-diff + +# unified diff +0 search/4096 ---\ +>&0 search/1024 \n +>>&0 search/1 +++\ +>>>&0 search/1024 \n +>>>>&0 search/1 @@ unified diff output text +!:mime text/x-diff +!:strength + 90 diff --git a/magic/elf b/magic/elf new file mode 100644 index 0000000000..aaf80cf10e --- /dev/null +++ b/magic/elf @@ -0,0 +1,43 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# elf: file(1) magic for ELF executables +# +# We have to check the byte order flag to see what byte order all the +# other stuff in the header is in. +# +# What're the correct byte orders for the nCUBE and the Fujitsu VPP500? +# +# Created by: unknown +# Modified by (1): Daniel Quinlan +# Modified by (2): Peter Tobias (core support) +# Modified by (3): Christian 'Dr. Disk' Hechelmann (fix of core support) +# Modified by (4): (VMS Itanium) +# Modified by (5): Matthias Urlichs (Listing of many architectures) +0 string \177ELF ELF +>4 byte 0 invalid class +>4 byte 1 32-bit +>4 byte 2 64-bit +>5 byte 0 invalid byte order +>5 byte 1 LSB +>>16 leshort 0 no file type, +!:strength *2 +!:mime application/octet-stream +>>16 leshort 1 relocatable, +!:mime application/x-object +>>16 leshort 2 executable, +!:mime application/x-executable +>>16 leshort 3 shared object, +!:mime application/x-sharedlib +>>16 leshort 4 core file +!:mime application/x-coredump +>5 byte 2 MSB +>>16 beshort 0 no file type, +!:mime application/octet-stream +>>16 beshort 1 relocatable, +!:mime application/x-object +>>16 beshort 2 executable, +!:mime application/x-executable +>>16 beshort 3 shared object, +!:mime application/x-sharedlib +>>16 beshort 4 core file, +!:mime application/x-coredump diff --git a/magic/epoc b/magic/epoc new file mode 100644 index 0000000000..d7397145fb --- /dev/null +++ b/magic/epoc @@ -0,0 +1,34 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: epoc,v 1.7 2009/09/19 16:28:09 christos Exp $ +# EPOC : file(1) magic for EPOC documents [Psion Series 5/Osaris/Geofox 1] +# Stefan Praszalowicz and Peter Breitenlohner +# Useful information for improving this file can be found at: +# http://software.frodo.looijaard.name/psiconv/formats/Index.html +#------------------------------------------------------------------------------ +0 lelong 0x10000037 Psion Series 5 +>4 lelong 0x10000042 multi-bitmap image +!:mime image/x-epoc-mbm +>4 lelong 0x1000006D +>>8 lelong 0x1000007D Sketch image +!:mime image/x-epoc-sketch +>>8 lelong 0x1000007F Word file +!:mime application/x-epoc-word +>>8 lelong 0x10000085 OPL program (TextEd) +!:mime application/x-epoc-opl +>>8 lelong 0x10000088 Sheet file +!:mime application/x-epoc-sheet +>4 lelong 0x10000073 OPO module +!:mime application/x-epoc-opo +>4 lelong 0x10000074 OPL application +!:mime application/x-epoc-app + + +0 lelong 0x10000050 Psion Series 5 +>4 lelong 0x1000006D database +>>8 lelong 0x10000084 Agenda file +!:mime application/x-epoc-agenda +>>8 lelong 0x10000086 Data file +!:mime application/x-epoc-data +>>8 lelong 0x10000CEA Jotter file +!:mime application/x-epoc-jotter diff --git a/magic/filesystems b/magic/filesystems new file mode 100644 index 0000000000..d2178296e0 --- /dev/null +++ b/magic/filesystems @@ -0,0 +1,12 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: filesystems,v 1.76 2013/02/18 18:45:41 christos Exp $ +# filesystems: file(1) magic for different filesystems +# + +# CDROM Filesystems +# Modified for UDF by gerardo.cacciari@gmail.com +32769 string CD001 # +!:mime application/x-iso9660-image +37633 string CD001 ISO 9660 CD-ROM filesystem data (raw 2352 byte sectors) +!:mime application/x-iso9660-image diff --git a/magic/flash b/magic/flash new file mode 100644 index 0000000000..b64761b12d --- /dev/null +++ b/magic/flash @@ -0,0 +1,18 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: flash,v 1.8 2009/09/19 16:28:09 christos Exp $ +# flash: file(1) magic for Macromedia Flash file format +# +# See +# +# http://www.macromedia.com/software/flash/open/ +# +0 string FWS Macromedia Flash data, +>3 byte x version %d +!:mime application/x-shockwave-flash +0 string CWS Macromedia Flash data (compressed), +!:mime application/x-shockwave-flash + +# From: Cal Peake +0 string FLV Macromedia Flash Video +!:mime video/x-flv diff --git a/magic/fonts b/magic/fonts new file mode 100644 index 0000000000..8189131d15 --- /dev/null +++ b/magic/fonts @@ -0,0 +1,32 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: fonts,v 1.25 2013/02/06 14:18:52 christos Exp $ +# fonts: file(1) magic for font data +# + +# X11 font files in SNF (Server Natural Format) format +# updated by Joerg Jenderek at Feb 2013 +# http://computer-programming-forum.com/51-perl/8f22fb96d2e34bab.htm +0 belong 00000004 X11 SNF font data, MSB first +#>104 belong 00000004 X11 SNF font data, MSB first +!:mime application/x-font-sfn +# GRR: line below too general as it catches also Xbase index file t3-CHAR.NDX +0 lelong 00000004 +>104 lelong 00000004 X11 SNF font data, LSB first +!:mime application/x-font-sfn + +# True Type fonts +0 string \000\001\000\000\000 TrueType font data +!:mime application/x-font-ttf + +# Opentype font data from Avi Bercovich +0 string OTTO OpenType font data +!:mime application/vnd.ms-opentype + +# Gurkan Sengun , www.linuks.mine.nu +0 string SplineFontDB: Spline Font Database +!:mime application/vnd.font-fontforge-sfd + +# EOT +34 string LP Embedded OpenType (EOT) +!:mime application/vnd.ms-fontobject diff --git a/magic/fortran b/magic/fortran new file mode 100644 index 0000000000..498eeacf8a --- /dev/null +++ b/magic/fortran @@ -0,0 +1,7 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: fortran,v 1.6 2009/09/19 16:28:09 christos Exp $ +# FORTRAN source +0 regex/100 \^[Cc][\ \t] FORTRAN program +!:mime text/x-fortran +!:strength - 5 diff --git a/magic/frame b/magic/frame new file mode 100644 index 0000000000..b42943bfcd --- /dev/null +++ b/magic/frame @@ -0,0 +1,31 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File$ +# frame: file(1) magic for FrameMaker files +# +# This stuff came on a FrameMaker demo tape, most of which is +# copyright, but this file is "published" as witness the following: +# +# Note that this is the Framemaker Maker Interchange Format, not the +# Normal format which would be application/vnd.framemaker. +# +0 string \6 string 3.0 (3.0) +#>6 string 2.0 (2.0) +#>6 string 1.0 (1.0) +0 string \ + +#------------------------------------------------------------------------------ +# XCF: file(1) magic for the XCF image format used in the GIMP developed +# by Spencer Kimball and Peter Mattis +# ('Bucky' LaDieu, nega@vt.edu) + +0 string gimp\ xcf GIMP XCF image data, +!:mime image/x-xcf diff --git a/magic/gnu b/magic/gnu new file mode 100644 index 0000000000..bf1f631751 --- /dev/null +++ b/magic/gnu @@ -0,0 +1,23 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: gnu,v 1.13 2012/01/03 17:16:54 christos Exp $ +# gnu: file(1) magic for various GNU tools +# +# GNU nlsutils message catalog file format +# +# GNU message catalog (.mo and .gmo files) + +# GnuPG +# The format is very similar to pgp +# Note: magic.mime had 0x8501 for the next line instead of 0x8502 +0 beshort 0x8502 GPG encrypted data +!:mime text/PGP # encoding: data + +# This magic is not particularly good, as the keyrings don't have true +# magic. Nevertheless, it covers many keyrings. +0 beshort 0x9901 GPG key public ring +!:mime application/x-gnupg-keyring + +# gettext message catalogue +0 regex \^msgid\ GNU gettext message catalogue text +!:mime text/x-po diff --git a/magic/gnumeric b/magic/gnumeric new file mode 100644 index 0000000000..b5edca93c1 --- /dev/null +++ b/magic/gnumeric @@ -0,0 +1,8 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File$ +# gnumeric: file(1) magic for Gnumeric spreadsheet +# This entry is only semi-helpful, as Gnumeric compresses its files, so +# they will ordinarily reported as "compressed", but at least -z helps +39 string =4 belong x \b, FORM is %d bytes long +# audio formats +>8 string AIFF \b, AIFF audio +!:mime audio/x-aiff +>8 string AIFC \b, AIFF-C compressed audio +!:mime audio/x-aiff +>8 string 8SVX \b, 8SVX 8-bit sampled sound voice +!:mime audio/x-aiff diff --git a/magic/images b/magic/images new file mode 100644 index 0000000000..281aba4706 --- /dev/null +++ b/magic/images @@ -0,0 +1,255 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: images,v 1.80 2013/02/06 14:18:52 christos Exp $ +# images: file(1) magic for image formats (see also "iff", and "c-lang" for +# XPM bitmaps) +# +# originally from jef@helios.ee.lbl.gov (Jef Poskanzer), +# additions by janl@ifi.uio.no as well as others. Jan also suggested +# merging several one- and two-line files into here. +# +# little magic: PCX (first byte is 0x0a) + +# PBMPLUS images +# The next byte following the magic is always whitespace. +# strength is changed to try these patterns before "x86 boot sector" +0 search/1 P1 +>3 regex =[0-9]*\ [0-9]* Netpbm PBM image text +>3 regex =[0-9]+\ \b, size = %sx +>>3 regex =\ [0-9]+ \b%s +!:strength + 45 +!:mime image/x-portable-bitmap +0 search/1 P2 +>3 regex =[0-9]*\ [0-9]* Netpbm PGM image text +>3 regex =[0-9]+\ \b, size = %sx +>>3 regex =\ [0-9]+ \b%s +!:strength + 45 +!:mime image/x-portable-greymap +0 search/1 P3 Netpbm PPM image text +>3 regex =[0-9]*\ [0-9]* Netpbm PPM image text +>3 regex =[0-9]+\ \b, size = %sx +>>3 regex =\ [0-9]+ \b%s +!:strength + 45 +!:mime image/x-portable-pixmap +0 string P4 +>3 regex =[0-9]*\ [0-9]* Netpbm PBM "rawbits" image data +>3 regex =[0-9]+\ \b, size = %sx +>>3 regex =\ [0-9]+ \b%s +!:strength + 45 +!:mime image/x-portable-bitmap +0 string P5 +>3 regex =[0-9]*\ [0-9]* Netpbm PGM "rawbits" image data +>3 regex =[0-9]+\ \b, size = %sx +>>3 regex =\ [0-9]+ \b%s +!:strength + 45 +!:mime image/x-portable-greymap +0 string P6 +>3 regex =[0-9]*\ [0-9]* Netpbm PPM "rawbits" image data +>3 regex =[0-9]+\ \b, size = %sx +>>3 regex =\ [0-9]+ \b%s +!:strength + 45 +!:mime image/x-portable-pixmap +0 string P7 Netpbm PAM image file +!:mime image/x-portable-pixmap + +# NIFF (Navy Interchange File Format, a modification of TIFF) images +# [GRR: this *must* go before TIFF] +0 string IIN1 NIFF image data +!:mime image/x-niff + +# Canon RAW version 1 (CRW) files are a type of Canon Image File Format +# (CIFF) file. These are apparently all little-endian. +# From: Adam Buchbinder +# URL: http://www.sno.phy.queensu.ca/~phil/exiftool/canon_raw.html +0 string II\x1a\0\0\0HEAPCCDR Canon CIFF raw image data +!:mime image/x-canon-crw + +# Canon RAW version 2 (CR2) files are a kind of TIFF with an extra magic +# number. Put this above the TIFF test to make sure we detect them. +# These are apparently all little-endian. +# From: Adam Buchbinder +# URL: http://libopenraw.freedesktop.org/wiki/Canon_CR2 +0 string II\x2a\0\x10\0\0\0CR Canon CR2 raw image data +!:mime image/x-canon-cr2 + +# Tag Image File Format, from Daniel Quinlan (quinlan@yggdrasil.com) +# The second word of TIFF files is the TIFF version number, 42, which has +# never changed. The TIFF specification recommends testing for it. +0 string MM\x00\x2a TIFF image data, big-endian +!:mime image/tiff +0 string II\x2a\x00 TIFF image data, little-endian +!:mime image/tiff + +0 string MM\x00\x2b Big TIFF image data, big-endian +!:mime image/tiff +0 string II\x2b\x00 Big TIFF image data, little-endian +!:mime image/tiff + +# PNG [Portable Network Graphics, or "PNG's Not GIF"] images +# (Greg Roelofs, newt@uchicago.edu) +# (Albert Cahalan, acahalan@cs.uml.edu) +# +# 137 P N G \r \n ^Z \n [4-byte length] H E A D [HEAD data] [HEAD crc] ... +# +0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data +!:mime image/png + +# possible GIF replacements; none yet released! +# (Greg Roelofs, newt@uchicago.edu) +# +# GRR 950115: this was mine ("Zip GIF"): +0 string GIF94z ZIF image (GIF+deflate alpha) +!:mime image/x-unknown +# +# GRR 950115: this is Jeremy Wohl's Free Graphics Format (better): +# +0 string FGF95a FGF image (GIF+deflate beta) +!:mime image/x-unknown +# +# GRR 950115: this is Thomas Boutell's Portable Bitmap Format proposal +# (best; not yet implemented): +# +0 string PBF PBF image (deflate compression) +!:mime image/x-unknown + +# GIF +0 string GIF8 GIF image data +!:mime image/gif +!:apple 8BIMGIFf + +# From: Joerg Jenderek +# most files with the extension .EPA and some with .BMP +0 string \x11\x06 Award BIOS Logo, 136 x 84 +!:mime image/x-award-bioslogo +0 string \x11\x09 Award BIOS Logo, 136 x 126 +!:mime image/x-award-bioslogo +#0 string \x07\x1f BIOS Logo corrupted? +# http://www.blackfiveservices.co.uk/awbmtools.shtml +# http://biosgfx.narod.ru/v3/ +# http://biosgfx.narod.ru/abr-2/ +0 string AWBM +>4 leshort <1981 Award BIOS bitmap +!:mime image/x-award-bmp + +# PC bitmaps (OS/2, Windows BMP files) (Greg Roelofs, newt@uchicago.edu) +0 string BM +>14 leshort 12 PC bitmap, OS/2 1.x format +!:mime image/x-ms-bmp +>14 leshort 64 PC bitmap, OS/2 2.x format +!:mime image/x-ms-bmp +>14 leshort 40 PC bitmap, Windows 3.x format +!:mime image/x-ms-bmp +>14 leshort 128 PC bitmap, Windows NT/2000 format +!:mime image/x-ms-bmp + +# XPM icons (Greg Roelofs, newt@uchicago.edu) +0 search/1 /*\ XPM\ */ X pixmap image text +!:mime image/x-xpmi + +# DICOM medical imaging data +128 string DICM DICOM medical imaging data +!:mime application/dicom + +# XWD - X Window Dump file. +# As described in /usr/X11R6/include/X11/XWDFile.h +# used by the xwd program. +# Bradford Castalia, idaeim, 1/01 +# updated by Adam Buchbinder, 2/09 +# The following assumes version 7 of the format; the first long is the length +# of the header, which is at least 25 4-byte longs, and the one at offset 8 +# is a constant which is always either 1 or 2. Offset 12 is the pixmap depth, +# which is a maximum of 32. +0 belong >100 +>8 belong <3 +>>12 belong <33 +>>>4 belong 7 XWD X Window Dump image data +!:mime image/x-xwindowdump + +# PCX image files +# From: Dan Fandrich +# updated by Joerg Jenderek at Feb 2013 by http://de.wikipedia.org/wiki/PCX +# http://web.archive.org/web/20100206055706/http://www.qzx.com/pc-gpe/pcx.txt +# GRR: original test was still too general as it catches xbase examples T5.DBT,T6.DBT with 0xa000000 +# test for bytes 0x0a,version byte (0,2,3,4,5),compression byte flag(0,1), bit depth (>0) of PCX or T5.DBT,T6.DBT +0 ubelong&0xffF8fe00 0x0a000000 +# for PCX bit depth > 0 +>3 ubyte >0 +# test for valid versions +>>1 ubyte <6 +>>>1 ubyte !1 PCX +!:mime image/x-pcx + +# Adobe Photoshop +# From: Asbjoern Sloth Toennesen +0 string 8BPS Adobe Photoshop Image +!:mime image/vnd.adobe.photoshop + +# Summary: DjVu image / document +# Extension: .djvu +# Reference: http://djvu.org/docs/DjVu3Spec.djvu +# Submitted by: Stephane Loeuillet +# Modified by (1): Abel Cheung +0 string AT&TFORM +>12 string DJVM DjVu multiple page document +!:mime image/vnd.djvu +>12 string DJVU DjVu image or single page document +!:mime image/vnd.djvu +>12 string DJVI DjVu shared document +!:mime image/vnd.djvu +>12 string THUM DjVu page thumbnails +!:mime image/vnd.djvu + +# Originally by Marc Espie +# Modified by Robert Minsk +# http://www.openexr.com/openexrfilelayout.pdf +0 lelong 20000630 OpenEXR image data, +!:mime image/x-exr + +# SMPTE Digital Picture Exchange Format, SMPTE DPX +# +# ANSI/SMPTE 268M-1994, SMPTE Standard for File Format for Digital +# Moving-Picture Exchange (DPX), v1.0, 18 February 1994 +# Robert Minsk +0 string SDPX DPX image data, big-endian, +!:mime image/x-dpx + +#----------------------------------------------------------------------- +# Hierarchical Data Format, used to facilitate scientific data exchange +# specifications at http://hdf.ncsa.uiuc.edu/ +0 belong 0x0e031301 Hierarchical Data Format (version 4) data +!:mime application/x-hdf +0 string \211HDF\r\n\032\n Hierarchical Data Format (version 5) data +!:mime application/x-hdf + +# http://www.cartesianinc.com/Tech/ +0 string CPC\262 Cartesian Perceptual Compression image +!:mime image/x-cpi + + +# Polar Monitor Bitmap (.pmb) used as logo for Polar Electro watches +# From: Markus Heidelberg +0 string/t [BitmapInfo2] Polar Monitor Bitmap text +!:mime image/x-polar-monitor-bitmap + +# Type: Olympus ORF raw images. +# URL: http://libopenraw.freedesktop.org/wiki/Olympus_ORF +# From: Adam Buchbinder +0 string MMOR Olympus ORF raw image data, big-endian +!:mime image/x-olympus-orf +0 string IIRO Olympus ORF raw image data, little-endian +!:mime image/x-olympus-orf +0 string IIRS Olympus ORF raw image data, little-endian +!:mime image/x-olympus-orf + +# Type: Foveon X3F +# URL: http://www.photofo.com/downloads/x3f-raw-format.pdf +# From: Adam Buchbinder +# Note that the MIME type isn't defined anywhere that I can find; if +# there's a canonical type for this format, it should replace this one. +0 string FOVb Foveon X3F raw image data +!:mime image/x-x3f + +# Paint.NET file +# From Adam Buchbinder +0 string PDN3 Paint.NET image data +!:mime image/x-paintnet diff --git a/magic/java b/magic/java new file mode 100644 index 0000000000..481ffec160 --- /dev/null +++ b/magic/java @@ -0,0 +1,16 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------ +# $File: java,v 1.13 2011/12/08 12:12:46 rrt Exp $ +# Java ByteCode and Mach-O binaries (e.g., Mac OS X) use the +# same magic number, 0xcafebabe, so they are both handled +# in the entry called "cafebabe". +#------------------------------------------------------------ + +0 belong 0xfeedfeed Java KeyStore +!:mime application/x-java-keystore +0 belong 0xcececece Java JCE KeyStore +!:mime application/x-java-jce-keystore + +# Java source +0 regex ^import.*;$ Java source +!:mime text/x-java diff --git a/magic/javascript b/magic/javascript new file mode 100644 index 0000000000..a1311d0e71 --- /dev/null +++ b/magic/javascript @@ -0,0 +1,17 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: $ +# javascript: magic for javascript and node.js scripts. +# +0 search/1/w #!/bin/node Node.js script text executable +!:mime application/javascript +0 search/1/w #!/usr/bin/node Node.js script text executable +!:mime application/javascript +0 search/1/w #!/bin/nodejs Node.js script text executable +!:mime application/javascript +0 search/1/w #!/usr/bin/nodejs Node.js script text executable +!:mime application/javascript +0 search/1 #!/usr/bin/env\ node Node.js script text executable +!:mime application/javascript +0 search/1 #!/usr/bin/env\ nodejs Node.js script text executable +!:mime application/javascript diff --git a/magic/jpeg b/magic/jpeg new file mode 100644 index 0000000000..55fedae4b4 --- /dev/null +++ b/magic/jpeg @@ -0,0 +1,31 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: jpeg,v 1.18 2012/08/01 12:12:36 christos Exp $ +# JPEG images +# SunOS 5.5.1 had +# +# 0 string \377\330\377\340 JPEG file +# 0 string \377\330\377\356 JPG file +# +# both of which turn into "JPEG image data" here. +# +0 beshort 0xffd8 JPEG image data +!:mime image/jpeg +!:apple 8BIMJPEG +!:strength +2 + +# From: David Santinoli +0 string \x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A JPEG 2000 +# From: Johan van der Knijff +# Added sub-entries for JP2, JPX, JPM and MJ2 formats; added mimetypes +# https://github.com/bitsgalore/jp2kMagic +# +# Now read value of 'Brand' field, which yields a few possibilities: +>20 string \x6a\x70\x32\x20 Part 1 (JP2) +!:mime image/jp2 +>20 string \x6a\x70\x78\x20 Part 2 (JPX) +!:mime image/jpx +>20 string \x6a\x70\x6d\x20 Part 6 (JPM) +!:mime image/jpm +>20 string \x6d\x6a\x70\x32 Part 3 (MJ2) +!:mime video/mj2 diff --git a/magic/kde b/magic/kde new file mode 100644 index 0000000000..2b66ee611d --- /dev/null +++ b/magic/kde @@ -0,0 +1,11 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: kde,v 1.4 2009/09/19 16:28:10 christos Exp $ +# kde: file(1) magic for KDE + +0 string/t [KDE\ Desktop\ Entry] KDE desktop entry +!:mime application/x-kdelnk +0 string/t #\ KDE\ Config\ File KDE config file +!:mime application/x-kdelnk +0 string/t #\ xmcd xmcd database file for kscd +!:mime text/x-xmcd diff --git a/magic/kml b/magic/kml new file mode 100644 index 0000000000..608ff0e1b0 --- /dev/null +++ b/magic/kml @@ -0,0 +1,30 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: kml,v 1.2 2009/09/19 16:28:10 christos Exp $ +# Type: Google KML, formerly Keyhole Markup Language +# Future development of this format has been handed +# over to the Open Geospatial Consortium. +# http://www.opengeospatial.org/standards/kml/ +# From: Asbjoern Sloth Toennesen +0 string/t \20 search/400 \ xmlns= +>>&0 regex ['"]http://earth.google.com/kml Google KML document +!:mime application/vnd.google-earth.kml+xml + +#------------------------------------------------------------------------------ +# Type: OpenGIS KML, formerly Keyhole Markup Language +# This standard is maintained by the +# Open Geospatial Consortium. +# http://www.opengeospatial.org/standards/kml/ +# From: Asbjoern Sloth Toennesen +>>&0 regex ['"]http://www.opengis.net/kml OpenGIS KML document +!:mime application/vnd.google-earth.kml+xml + +#------------------------------------------------------------------------------ +# Type: Google KML Archive (ZIP based) +# http://code.google.com/apis/kml/documentation/kml_tut.html +# From: Asbjoern Sloth Toennesen +0 string PK\003\004 +>4 byte 0x14 +>>30 string doc.kml Compressed Google KML Document, including resources. +!:mime application/vnd.google-earth.kmz diff --git a/magic/linux b/magic/linux new file mode 100644 index 0000000000..4a5c935760 --- /dev/null +++ b/magic/linux @@ -0,0 +1,22 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: linux,v 1.46 2013/01/06 21:26:48 christos Exp $ +# linux: file(1) magic for Linux files +# +# Values for Linux/i386 binaries, from Daniel Quinlan +# The following basic Linux magic is useful for reference, but using +# "long" magic is a better practice in order to avoid collisions. +# +# 2 leshort 100 Linux/i386 +# >0 leshort 0407 impure executable (OMAGIC) +# >0 leshort 0410 pure executable (NMAGIC) +# >0 leshort 0413 demand-paged executable (ZMAGIC) +# >0 leshort 0314 demand-paged executable (QMAGIC) +# + +# SYSLINUX boot logo files (from 'ppmtolss16' sources) +# http://www.syslinux.org/wiki/index.php/SYSLINUX#Display_graphic_from_filename: +# file extension .lss .16 +0 lelong =0x1413f33d SYSLINUX' LSS16 image data +# syslinux-4.05/mime/image/x-lss16.xml +!:mime image/x-lss16 diff --git a/magic/lisp b/magic/lisp new file mode 100644 index 0000000000..f5a06c8964 --- /dev/null +++ b/magic/lisp @@ -0,0 +1,42 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File$ +# lisp: file(1) magic for lisp programs +# +# various lisp types, from Daniel Quinlan (quinlan@yggdrasil.com) + +# updated by Joerg Jenderek +# GRR: This lot is too weak +#0 string ;; +# windows INF files often begin with semicolon and use CRLF as line end +# lisp files are mainly created on unix system with LF as line end +#>2 search/4096 !\r Lisp/Scheme program text +#>2 search/4096 \r Windows INF file + +0 search/4096 (setq\ Lisp/Scheme program text +!:mime text/x-lisp +0 search/4096 (defvar\ Lisp/Scheme program text +!:mime text/x-lisp +0 search/4096 (defparam\ Lisp/Scheme program text +!:mime text/x-lisp +0 search/4096 (defun\ Lisp/Scheme program text +!:mime text/x-lisp +0 search/4096 (autoload\ Lisp/Scheme program text +!:mime text/x-lisp +0 search/4096 (custom-set-variables\ Lisp/Scheme program text +!:mime text/x-lisp + +# Emacs 18 - this is always correct, but not very magical. +0 string \012( Emacs v18 byte-compiled Lisp data +!:mime application/x-elc +# Emacs 19+ - ver. recognition added by Ian Springer +# Also applies to XEmacs 19+ .elc files; could tell them apart with regexs +# - Chris Chittleborough +0 string ;ELC +>4 byte >18 +>4 byte <32 Emacs/XEmacs v%d byte-compiled Lisp data +!:mime application/x-elc + +# From: David Allouche +0 search/1 \, Seo Sanghyeon + +# Lua scripts +0 search/1/w #!\ /usr/bin/lua Lua script text executable +!:mime text/x-lua +0 search/1/w #!\ /usr/local/bin/lua Lua script text executable +!:mime text/x-lua +0 search/1 #!/usr/bin/env\ lua Lua script text executable +!:mime text/x-lua +0 search/1 #!\ /usr/bin/env\ lua Lua script text executable +!:mime text/x-lua + diff --git a/magic/m4 b/magic/m4 new file mode 100644 index 0000000000..7262fca81b --- /dev/null +++ b/magic/m4 @@ -0,0 +1,7 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File$ +# make: file(1) magic for M4 scripts +# +0 regex \^dnl\ M4 macro processor script text +!:mime text/x-m4 diff --git a/magic/macintosh b/magic/macintosh new file mode 100644 index 0000000000..6398fc2ff2 --- /dev/null +++ b/magic/macintosh @@ -0,0 +1,21 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: macintosh,v 1.21 2010/09/20 19:19:17 rrt Exp $ +# macintosh description +# +# BinHex is the Macintosh ASCII-encoded file format (see also "apple") +# Daniel Quinlan, quinlan@yggdrasil.com +11 string must\ be\ converted\ with\ BinHex BinHex binary text +!:mime application/mac-binhex40 + +# Stuffit archives are the de facto standard of compression for Macintosh +# files obtained from most archives. (franklsm@tuns.ca) +0 string SIT! StuffIt Archive (data) +!:mime application/x-stuffit +!:apple SIT!SIT! + +# Newer StuffIt archives (grant@netbsd.org) +0 string StuffIt StuffIt Archive +!:mime application/x-stuffit +!:apple SIT!SIT! +#>162 string >0 : %s diff --git a/magic/mail.news b/magic/mail.news new file mode 100644 index 0000000000..c1a446d4ca --- /dev/null +++ b/magic/mail.news @@ -0,0 +1,35 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: mail.news,v 1.21 2012/06/21 01:44:52 christos Exp $ +# mail.news: file(1) magic for mail and news +# +# Unfortunately, saved netnews also has From line added in some news software. +#0 string From mail text +0 string/t Relay-Version: old news text +!:mime message/rfc822 +0 string/t #!\ rnews batched news text +!:mime message/rfc822 +0 string/t N#!\ rnews mailed, batched news text +!:mime message/rfc822 +0 string/t Forward\ to mail forwarding text +!:mime message/rfc822 +0 string/t Pipe\ to mail piping text +!:mime message/rfc822 +0 string/tc delivered-to: SMTP mail text +!:mime message/rfc822 +0 string/tc return-path: SMTP mail text +!:mime message/rfc822 +0 string/t Path: news text +!:mime message/news +0 string/t Xref: news text +!:mime message/news +0 string/t From: news or mail text +!:mime message/rfc822 +0 string/t Article saved news text +!:mime message/news +0 string/t Received: RFC 822 mail text +!:mime message/rfc822 + +# TNEF files... +0 lelong 0x223E9F78 Transport Neutral Encapsulation Format +!:mime application/vnd.ms-tnef diff --git a/magic/make b/magic/make new file mode 100644 index 0000000000..83d6a012dd --- /dev/null +++ b/magic/make @@ -0,0 +1,16 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File$ +# make: file(1) magic for makefiles +# +0 regex \^CFLAGS makefile script text +!:mime text/x-makefile +0 regex \^LDFLAGS makefile script text +!:mime text/x-makefile +0 regex \^all: makefile script text +!:mime text/x-makefile +0 regex \^.PRECIOUS makefile script text +!:mime text/x-makefile + +0 regex \^SUBDIRS automake makefile script text +!:mime text/x-makefile diff --git a/magic/marc21 b/magic/marc21 new file mode 100644 index 0000000000..26899d2e70 --- /dev/null +++ b/magic/marc21 @@ -0,0 +1,29 @@ +# See COPYING file in this directory for original libmagic copyright. +#-------------------------------------------- +# marc21: file(1) magic for MARC 21 Format +# +# Kevin Ford (kefo@loc.gov) +# +# MARC21 formats are for the representation and communication +# of bibliographic and related information in machine-readable +# form. For more info, see http://www.loc.gov/marc/ + + +# leader position 20-21 must be 45 +20 string 45 + +# leader starts with 5 digits, followed by codes specific to MARC format +>0 regex/1 (^[0-9]{5})[acdnp][^bhlnqsu-z] MARC21 Bibliographic +!:mime application/marc +>0 regex/1 (^[0-9]{5})[acdnosx][z] MARC21 Authority +!:mime application/marc +>0 regex/1 (^[0-9]{5})[cdn][uvxy] MARC21 Holdings +!:mime application/marc +0 regex/1 (^[0-9]{5})[acdn][w] MARC21 Classification +!:mime application/marc +>0 regex/1 (^[0-9]{5})[cdn][q] MARC21 Community +!:mime application/marc + +# leader position 22-23, should be "00" but is it? +>0 regex/1 (^.{21})([^0]{2}) (non-conforming) +!:mime application/marc diff --git a/magic/matroska b/magic/matroska new file mode 100644 index 0000000000..c1791413cb --- /dev/null +++ b/magic/matroska @@ -0,0 +1,17 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: matroska,v 1.7 2012/08/26 10:06:15 christos Exp $ +# matroska: file(1) magic for Matroska files +# +# See http://www.matroska.org/ +# + +# EBML id: +0 belong 0x1a45dfa3 +# DocType id: +>4 search/4096 \x42\x82 +# DocType contents: +>>&1 string webm WebM +!:mime video/webm +>>&1 string matroska Matroska data +!:mime video/x-matroska diff --git a/magic/misctools b/magic/misctools new file mode 100644 index 0000000000..35fddaa61a --- /dev/null +++ b/magic/misctools @@ -0,0 +1,9 @@ +# See COPYING file in this directory for original libmagic copyright. +#----------------------------------------------------------------------------- +# $File: misctools,v 1.12 2010/09/29 18:36:49 rrt Exp $ +# misctools: file(1) magic for miscellaneous UNIX tools. +# +0 string/c BEGIN:VCALENDAR vCalendar calendar file +!:mime text/calendar +0 string/c BEGIN:VCARD vCard visiting card +!:mime text/x-vcard diff --git a/magic/msdos b/magic/msdos new file mode 100644 index 0000000000..6182801f20 --- /dev/null +++ b/magic/msdos @@ -0,0 +1,368 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: msdos,v 1.84 2013/02/05 13:55:22 christos Exp $ +# msdos: file(1) magic for MS-DOS files +# + +# .BAT files (Daniel Quinlan, quinlan@yggdrasil.com) +# updated by Joerg Jenderek at Oct 2008,Apr 2011 +0 string/t @ +>1 string/cW \ echo\ off DOS batch file text +!:mime text/x-msdos-batch +>1 string/cW echo\ off DOS batch file text +!:mime text/x-msdos-batch +>1 string/cW rem DOS batch file text +!:mime text/x-msdos-batch +>1 string/cW set\ DOS batch file text +!:mime text/x-msdos-batch + +# Tests for various EXE types. +# +# Many of the compressed formats were extraced from IDARC 1.23 source code. +# +0 string/b MZ +!:mime application/x-dosexec +# All non-DOS EXE extensions have the relocation table more than 0x40 bytes into the file. +>0x18 leshort <0x40 MS-DOS executable +# These traditional tests usually work but not always. When test quality support is +# implemented these can be turned on. +#>>0x18 leshort 0x1c (Borland compiler) +#>>0x18 leshort 0x1e (MS compiler) + +# If the relocation table is 0x40 or more bytes into the file, it's definitely +# not a DOS EXE. +>0x18 leshort >0x3f + +# Maybe it's a PE? +>>(0x3c.l) string PE\0\0 PE +>>>(0x3c.l+24) leshort 0x010b \b32 executable +>>>(0x3c.l+24) leshort 0x020b \b32+ executable +>>>(0x3c.l+24) leshort 0x0107 ROM image +>>>(0x3c.l+24) default x Unknown PE signature +>>>>&0 leshort x 0x%x +>>>(0x3c.l+22) leshort&0x2000 >0 (DLL) +>>>(0x3c.l+92) leshort 1 (native) +>>>(0x3c.l+92) leshort 2 (GUI) +>>>(0x3c.l+92) leshort 3 (console) +>>>(0x3c.l+92) leshort 7 (POSIX) +>>>(0x3c.l+92) leshort 9 (Windows CE) +>>>(0x3c.l+92) leshort 10 (EFI application) +>>>(0x3c.l+92) leshort 11 (EFI boot service driver) +>>>(0x3c.l+92) leshort 12 (EFI runtime driver) +>>>(0x3c.l+92) leshort 13 (EFI ROM) +>>>(0x3c.l+92) leshort 14 (XBOX) +>>>(0x3c.l+92) leshort 15 (Windows boot application) +>>>(0x3c.l+92) default x (Unknown subsystem +>>>>&0 leshort x 0x%x) +>>>(0x3c.l+4) leshort 0x14c Intel 80386 +>>>(0x3c.l+4) leshort 0x166 MIPS R4000 +>>>(0x3c.l+4) leshort 0x168 MIPS R10000 +>>>(0x3c.l+4) leshort 0x184 Alpha +>>>(0x3c.l+4) leshort 0x1a2 Hitachi SH3 +>>>(0x3c.l+4) leshort 0x1a6 Hitachi SH4 +>>>(0x3c.l+4) leshort 0x1c0 ARM +>>>(0x3c.l+4) leshort 0x1c2 ARM Thumb +>>>(0x3c.l+4) leshort 0x1c4 ARMv7 Thumb +>>>(0x3c.l+4) leshort 0x1f0 PowerPC +>>>(0x3c.l+4) leshort 0x200 Intel Itanium +>>>(0x3c.l+4) leshort 0x266 MIPS16 +>>>(0x3c.l+4) leshort 0x268 Motorola 68000 +>>>(0x3c.l+4) leshort 0x290 PA-RISC +>>>(0x3c.l+4) leshort 0x366 MIPSIV +>>>(0x3c.l+4) leshort 0x466 MIPS16 with FPU +>>>(0x3c.l+4) leshort 0xebc EFI byte code +>>>(0x3c.l+4) leshort 0x8664 x86-64 +>>>(0x3c.l+4) leshort 0xc0ee MSIL +>>>(0x3c.l+4) default x Unknown processor type +>>>>&0 leshort x 0x%x +>>>(0x3c.l+22) leshort&0x0200 >0 (stripped to external PDB) +>>>(0x3c.l+22) leshort&0x1000 >0 system file +>>>(0x3c.l+24) leshort 0x010b +>>>>(0x3c.l+232) lelong >0 Mono/.Net assembly +>>>(0x3c.l+24) leshort 0x020b +>>>>(0x3c.l+248) lelong >0 Mono/.Net assembly + +# hooray, there's a DOS extender using the PE format, with a valid PE +# executable inside (which just prints a message and exits if run in win) +>>>(8.s*16) string 32STUB \b, 32rtm DOS extender +>>>(8.s*16) string !32STUB \b, for MS Windows +>>>(0x3c.l+0xf8) string UPX0 \b, UPX compressed +>>>(0x3c.l+0xf8) search/0x140 PEC2 \b, PECompact2 compressed +>>>(0x3c.l+0xf8) search/0x140 UPX2 +>>>>(&0x10.l+(-4)) string PK\3\4 \b, ZIP self-extracting archive (Info-Zip) +>>>(0x3c.l+0xf8) search/0x140 .idata +>>>>(&0xe.l+(-4)) string PK\3\4 \b, ZIP self-extracting archive (Info-Zip) +>>>>(&0xe.l+(-4)) string ZZ0 \b, ZZip self-extracting archive +>>>>(&0xe.l+(-4)) string ZZ1 \b, ZZip self-extracting archive +>>>(0x3c.l+0xf8) search/0x140 .rsrc +>>>>(&0x0f.l+(-4)) string a\\\4\5 \b, WinHKI self-extracting archive +>>>>(&0x0f.l+(-4)) string Rar! \b, RAR self-extracting archive +>>>>(&0x0f.l+(-4)) search/0x3000 MSCF \b, InstallShield self-extracting archive +>>>>(&0x0f.l+(-4)) search/32 Nullsoft \b, Nullsoft Installer self-extracting archive +>>>(0x3c.l+0xf8) search/0x140 .data +>>>>(&0x0f.l) string WEXTRACT \b, MS CAB-Installer self-extracting archive +>>>(0x3c.l+0xf8) search/0x140 .petite\0 \b, Petite compressed +>>>>(0x3c.l+0xf7) byte x +>>>>>(&0x104.l+(-4)) string =!sfx! \b, ACE self-extracting archive +>>>(0x3c.l+0xf8) search/0x140 .WISE \b, WISE installer self-extracting archive +>>>(0x3c.l+0xf8) search/0x140 .dz\0\0\0 \b, Dzip self-extracting archive +>>>&(0x3c.l+0xf8) search/0x100 _winzip_ \b, ZIP self-extracting archive (WinZip) +>>>&(0x3c.l+0xf8) search/0x100 SharedD \b, Microsoft Installer self-extracting archive +>>>0x30 string Inno \b, InnoSetup self-extracting archive + +# Hmm, not a PE but the relocation table is too high for a traditional DOS exe, +# must be one of the unusual subformats. +>>(0x3c.l) string !PE\0\0 MS-DOS executable + +>>(0x3c.l) string NE \b, NE +>>>(0x3c.l+0x36) byte 1 for OS/2 1.x +>>>(0x3c.l+0x36) byte 2 for MS Windows 3.x +>>>(0x3c.l+0x36) byte 3 for MS-DOS +>>>(0x3c.l+0x36) byte 4 for Windows 386 +>>>(0x3c.l+0x36) byte 5 for Borland Operating System Services +>>>(0x3c.l+0x36) default x +>>>>(0x3c.l+0x36) byte x (unknown OS %x) +>>>(0x3c.l+0x36) byte 0x81 for MS-DOS, Phar Lap DOS extender +>>>(0x3c.l+0x0c) leshort&0x8003 0x8002 (DLL) +>>>(0x3c.l+0x0c) leshort&0x8003 0x8001 (driver) +>>>&(&0x24.s-1) string ARJSFX \b, ARJ self-extracting archive +>>>(0x3c.l+0x70) search/0x80 WinZip(R)\ Self-Extractor \b, ZIP self-extracting archive (WinZip) + +>>(0x3c.l) string LX\0\0 \b, LX +>>>(0x3c.l+0x0a) leshort <1 (unknown OS) +>>>(0x3c.l+0x0a) leshort 1 for OS/2 +>>>(0x3c.l+0x0a) leshort 2 for MS Windows +>>>(0x3c.l+0x0a) leshort 3 for DOS +>>>(0x3c.l+0x0a) leshort >3 (unknown OS) +>>>(0x3c.l+0x10) lelong&0x28000 =0x8000 (DLL) +>>>(0x3c.l+0x10) lelong&0x20000 >0 (device driver) +>>>(0x3c.l+0x10) lelong&0x300 0x300 (GUI) +>>>(0x3c.l+0x10) lelong&0x28300 <0x300 (console) +>>>(0x3c.l+0x08) leshort 1 i80286 +>>>(0x3c.l+0x08) leshort 2 i80386 +>>>(0x3c.l+0x08) leshort 3 i80486 +>>>(8.s*16) string emx \b, emx +>>>>&1 string x %s +>>>&(&0x54.l-3) string arjsfx \b, ARJ self-extracting archive + +# MS Windows system file, supposedly a collection of LE executables +>>(0x3c.l) string W3 \b, W3 for MS Windows + +>>(0x3c.l) string LE\0\0 \b, LE executable +>>>(0x3c.l+0x0a) leshort 1 +# some DOS extenders use LE files with OS/2 header +>>>>0x240 search/0x100 DOS/4G for MS-DOS, DOS4GW DOS extender +>>>>0x240 search/0x200 WATCOM\ C/C++ for MS-DOS, DOS4GW DOS extender +>>>>0x440 search/0x100 CauseWay\ DOS\ Extender for MS-DOS, CauseWay DOS extender +>>>>0x40 search/0x40 PMODE/W for MS-DOS, PMODE/W DOS extender +>>>>0x40 search/0x40 STUB/32A for MS-DOS, DOS/32A DOS extender (stub) +>>>>0x40 search/0x80 STUB/32C for MS-DOS, DOS/32A DOS extender (configurable stub) +>>>>0x40 search/0x80 DOS/32A for MS-DOS, DOS/32A DOS extender (embedded) +# this is a wild guess; hopefully it is a specific signature +>>>>&0x24 lelong <0x50 +>>>>>(&0x4c.l) string \xfc\xb8WATCOM +>>>>>>&0 search/8 3\xdbf\xb9 \b, 32Lite compressed +# another wild guess: if real OS/2 LE executables exist, they probably have higher start EIP +#>>>>(0x3c.l+0x1c) lelong >0x10000 for OS/2 +# fails with DOS-Extenders. +>>>(0x3c.l+0x0a) leshort 2 for MS Windows +>>>(0x3c.l+0x0a) leshort 3 for DOS +>>>(0x3c.l+0x0a) leshort 4 for MS Windows (VxD) +>>>(&0x7c.l+0x26) string UPX \b, UPX compressed +>>>&(&0x54.l-3) string UNACE \b, ACE self-extracting archive + +# looks like ASCII, probably some embedded copyright message. +# and definitely not NE/LE/LX/PE +>>0x3c lelong >0x20000000 +>>>(4.s*512) leshort !0x014c \b, MZ for MS-DOS +# header data too small for extended executable +>2 long !0 +>>0x18 leshort <0x40 +>>>(4.s*512) leshort !0x014c + +>>>>&(2.s-514) string !LE +>>>>>&-2 string !BW \b, MZ for MS-DOS +>>>>&(2.s-514) string LE \b, LE +>>>>>0x240 search/0x100 DOS/4G for MS-DOS, DOS4GW DOS extender +# educated guess since indirection is still not capable enough for complex offset +# calculations (next embedded executable would be at &(&2*512+&0-2) +# I suspect there are only LE executables in these multi-exe files +>>>>&(2.s-514) string BW +>>>>>0x240 search/0x100 DOS/4G ,\b LE for MS-DOS, DOS4GW DOS extender (embedded) +>>>>>0x240 search/0x100 !DOS/4G ,\b BW collection for MS-DOS + +# This sequence skips to the first COFF segment, usually .text +>(4.s*512) leshort 0x014c \b, COFF +>>(8.s*16) string go32stub for MS-DOS, DJGPP go32 DOS extender +>>(8.s*16) string emx +>>>&1 string x for DOS, Win or OS/2, emx %s +>>&(&0x42.l-3) byte x +>>>&0x26 string UPX \b, UPX compressed +# and yet another guess: small .text, and after large .data is unusal, could be 32lite +>>&0x2c search/0xa0 .text +>>>&0x0b lelong <0x2000 +>>>>&0 lelong >0x6000 \b, 32lite compressed + +>(8.s*16) string $WdX \b, WDos/X DOS extender + +# By now an executable type should have been printed out. The executable +# may be a self-uncompressing archive, so look for evidence of that and +# print it out. +# +# Some signatures below from Greg Roelofs, newt@uchicago.edu. +# +>0x35 string \x8e\xc0\xb9\x08\x00\xf3\xa5\x4a\x75\xeb\x8e\xc3\x8e\xd8\x33\xff\xbe\x30\x00\x05 \b, aPack compressed +>0xe7 string LH/2\ Self-Extract \b, %s +>0x1c string UC2X \b, UCEXE compressed +>0x1c string WWP\ \b, WWPACK compressed +>0x1c string RJSX \b, ARJ self-extracting archive +>0x1c string diet \b, diet compressed +>0x1c string LZ09 \b, LZEXE v0.90 compressed +>0x1c string LZ91 \b, LZEXE v0.91 compressed +>0x1c string tz \b, TinyProg compressed +>0x1e string Copyright\ 1989-1990\ PKWARE\ Inc. Self-extracting PKZIP archive +!:mime application/zip +# Yes, this really is "Copr", not "Corp." +>0x1e string PKLITE\ Copr. Self-extracting PKZIP archive +!:mime application/zip +# winarj stores a message in the stub instead of the sig in the MZ header +>0x20 search/0xe0 aRJsfX \b, ARJ self-extracting archive +>0x20 string AIN +>>0x23 string 2 \b, AIN 2.x compressed +>>0x23 string <2 \b, AIN 1.x compressed +>>0x23 string >2 \b, AIN 1.x compressed +>0x24 string LHa's\ SFX \b, LHa self-extracting archive +!:mime application/x-lha +>0x24 string LHA's\ SFX \b, LHa self-extracting archive +!:mime application/x-lha +>0x24 string \ $ARX \b, ARX self-extracting archive +>0x24 string \ $LHarc \b, LHarc self-extracting archive +>0x20 string SFX\ by\ LARC \b, LARC self-extracting archive +>0x40 string aPKG \b, aPackage self-extracting archive +>0x64 string W\ Collis\0\0 \b, Compack compressed +>0x7a string Windows\ self-extracting\ ZIP \b, ZIP self-extracting archive +>>&0xf4 search/0x140 \x0\x40\x1\x0 +>>>(&0.l+(4)) string MSCF \b, WinHKI CAB self-extracting archive +>1638 string -lh5- \b, LHa self-extracting archive v2.13S +>0x17888 string Rar! \b, RAR self-extracting archive + +# Skip to the end of the EXE. This will usually work fine in the PE case +# because the MZ image is hardcoded into the toolchain and almost certainly +# won't match any of these signatures. +>(4.s*512) long x +>>&(2.s-517) byte x +>>>&0 string PK\3\4 \b, ZIP self-extracting archive +>>>&0 string Rar! \b, RAR self-extracting archive +>>>&0 string =!\x11 \b, AIN 2.x self-extracting archive +>>>&0 string =!\x12 \b, AIN 2.x self-extracting archive +>>>&0 string =!\x17 \b, AIN 1.x self-extracting archive +>>>&0 string =!\x18 \b, AIN 1.x self-extracting archive +>>>&7 search/400 **ACE** \b, ACE self-extracting archive +>>>&0 search/0x480 UC2SFX\ Header \b, UC2 self-extracting archive + +# a few unknown ZIP sfxes, no idea if they are needed or if they are +# already captured by the generic patterns above +>(8.s*16) search/0x20 PKSFX \b, ZIP self-extracting archive (PKZIP) +# TODO: how to add this? >FileSize-34 string Windows\ Self-Installing\ Executable \b, ZIP self-extracting archive +# + +# TELVOX Teleinformatica CODEC self-extractor for OS/2: +>49801 string \x79\xff\x80\xff\x76\xff \b, CODEC archive v3.21 +>>49824 leshort =1 \b, 1 file +>>49824 leshort >1 \b, %u files + +# Popular applications +2080 string Microsoft\ Word\ 6.0\ Document %s +!:mime application/msword +2080 string Documento\ Microsoft\ Word\ 6 Spanish Microsoft Word 6 document data +!:mime application/msword +# Pawel Wiecek (for polish Word) +2112 string MSWordDoc Microsoft Word document data +!:mime application/msword +# +0 belong 0x31be0000 Microsoft Word Document +!:mime application/msword +# +0 string/b PO^Q` Microsoft Word 6.0 Document +!:mime application/msword +# +0 string/b \376\067\0\043 Microsoft Office Document +!:mime application/msword +0 string/b \333\245-\0\0\0 Microsoft Office Document +!:mime application/msword +512 string/b \354\245\301 Microsoft Word Document +!:mime application/msword + +# +0 string/b \xDB\xA5\x2D\x00 Microsoft WinWord 2.0 Document +!:mime application/msword +# +2080 string Microsoft\ Excel\ 5.0\ Worksheet %s +!:mime application/vnd.ms-excel +# +0 string/b \xDB\xA5\x2D\x00 Microsoft WinWord 2.0 Document +!:mime application/msword + +2080 string Foglio\ di\ lavoro\ Microsoft\ Exce %s +!:mime application/vnd.ms-excel +# +# Pawel Wiecek (for polish Excel) +2114 string Biff5 Microsoft Excel 5.0 Worksheet +!:mime application/vnd.ms-excel +# Italian MS-Excel +2121 string Biff5 Microsoft Excel 5.0 Worksheet +!:mime application/vnd.ms-excel +0 string/b \x09\x04\x06\x00\x00\x00\x10\x00 Microsoft Excel Worksheet +!:mime application/vnd.ms-excel +# +0 belong 0x00001a00 Lotus 1-2-3 +!:mime application/x-123 +# +0 belong 0x00000200 Lotus 1-2-3 +!:mime application/x-123 +0 string/b WordPro\0 Lotus WordPro +!:mime application/vnd.lotus-wordpro +0 string/b WordPro\r\373 Lotus WordPro +!:mime application/vnd.lotus-wordpro + +# Windows icons (Ian Springer ) +0 string/b \000\000\001\000 MS Windows icon resource +!:mime image/x-icon + +# .PIF files added by Joerg Jenderek from http://smsoft.ru/en/pifdoc.htm +# only for windows versions equal or greater 3.0 +0x171 string MICROSOFT\ PIFEX\0 Windows Program Information File +!:mime application/x-dosexec + +# TNEF magic From "Joomy" +# Microsoft Outlook's Transport Neutral Encapsulation Format (TNEF) +0 leshort 0x223e9f78 TNEF +!:mime application/vnd.ms-tnef + +#------------------------------------------------------------------------------ +# From Stuart Caie (developer of cabextract) +# Microsoft Cabinet files +0 string/b MSCF\0\0\0\0 Microsoft Cabinet archive data +!:mime application/vnd.ms-cab-compressed + +# from http://filext.com by Derek M Jones +# False positive with PPT (also currently this string is too long) +#0 string/b \xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3E\x00\x03\x00\xFE\xFF\x09\x00\x06 Microsoft Installer +0 string/b \320\317\021\340\241\261\032\341 Microsoft Office Document +#>48 byte 0x1B Excel Document +#!:mime application/vnd.ms-excel +>546 string bjbj Microsoft Word Document +!:mime application/msword +>546 string jbjb Microsoft Word Document +!:mime application/msword + +0 string/b \224\246\056 Microsoft Word Document +!:mime application/msword + +512 string R\0o\0o\0t\0\ \0E\0n\0t\0r\0y Microsoft Word Document +!:mime application/msword + +# MS eBook format (.lit) +0 string/b ITOLITLS Microsoft Reader eBook Data +>8 lelong x \b, version %u +!:mime application/x-ms-reader diff --git a/magic/neko b/magic/neko new file mode 100644 index 0000000000..50163a0861 --- /dev/null +++ b/magic/neko @@ -0,0 +1,12 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------ +# $File: java,v 1.12 2009/09/19 16:28:10 christos Exp $ + +# From: Mikhail Gusarov +# NekoVM (http://nekovm.org/) bytecode +0 string NEKO NekoVM bytecode +>4 lelong x (%d global symbols, +>8 lelong x %d global fields, +>12 lelong x %d bytecode ops) +!:mime application/x-nekovm-bytecode + diff --git a/magic/pascal b/magic/pascal new file mode 100644 index 0000000000..911eea3c0c --- /dev/null +++ b/magic/pascal @@ -0,0 +1,11 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File$ +# pascal: file(1) magic for Pascal source +# +0 search/8192 (input, Pascal source text +!:mime text/x-pascal +0 regex \^program Pascal source text +!:mime text/x-pascal +0 regex \^record Pascal source text +!:mime text/x-pascal diff --git a/magic/pdf b/magic/pdf new file mode 100644 index 0000000000..761006ffe6 --- /dev/null +++ b/magic/pdf @@ -0,0 +1,8 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File$ +# pdf: file(1) magic for Portable Document Format +# + +0 string %PDF- PDF document +!:mime application/pdf diff --git a/magic/perl b/magic/perl new file mode 100644 index 0000000000..12ec33b73a --- /dev/null +++ b/magic/perl @@ -0,0 +1,26 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: perl,v 1.19 2012/06/20 21:16:25 christos Exp $ +# perl: file(1) magic for Larry Wall's perl language. +# +# The `eval' lines recognizes an outrageously clever hack. +# Keith Waclena +# Send additions to +0 search/1/w #!\ /bin/perl Perl script text executable +!:mime text/x-perl +0 search/1 eval\ "exec\ /bin/perl Perl script text +!:mime text/x-perl +0 search/1/w #!\ /usr/bin/perl Perl script text executable +!:mime text/x-perl +0 search/1 eval\ "exec\ /usr/bin/perl Perl script text +!:mime text/x-perl +0 search/1/w #!\ /usr/local/bin/perl Perl script text executable +!:mime text/x-perl +0 search/1 eval\ "exec\ /usr/local/bin/perl Perl script text +!:mime text/x-perl +0 search/1 eval\ '(exit\ $?0)'\ &&\ eval\ 'exec Perl script text +!:mime text/x-perl +0 search/1 #!/usr/bin/env\ perl Perl script text executable +!:mime text/x-perl +0 search/1 #!\ /usr/bin/env\ perl Perl script text executable +!:mime text/x-perl diff --git a/magic/pgp b/magic/pgp new file mode 100644 index 0000000000..2bdfb77981 --- /dev/null +++ b/magic/pgp @@ -0,0 +1,27 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File$ +# pgp: file(1) magic for Pretty Good Privacy +# see http://lists.gnupg.org/pipermail/gnupg-devel/1999-September/016052.html +# +0 beshort 0x9900 PGP key public ring +!:mime application/x-pgp-keyring +0 beshort 0x9501 PGP key security ring +!:mime application/x-pgp-keyring +0 beshort 0x9500 PGP key security ring +!:mime application/x-pgp-keyring +0 beshort 0xa600 PGP encrypted data +#!:mime application/pgp-encrypted +#0 string -----BEGIN\040PGP text/PGP armored data +!:mime text/PGP # encoding: armored data +#>15 string PUBLIC\040KEY\040BLOCK- public key block +#>15 string MESSAGE- message +#>15 string SIGNED\040MESSAGE- signed message +#>15 string PGP\040SIGNATURE- signature + +2 string ---BEGIN\ PGP\ PUBLIC\ KEY\ BLOCK- PGP public key block +!:mime application/pgp-keys +0 string -----BEGIN\040PGP\40MESSAGE- PGP message +!:mime application/pgp +0 string -----BEGIN\040PGP\40SIGNATURE- PGP signature +!:mime application/pgp-signature diff --git a/magic/pkgadd b/magic/pkgadd new file mode 100644 index 0000000000..602b4ec21d --- /dev/null +++ b/magic/pkgadd @@ -0,0 +1,7 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File$ +# pkgadd: file(1) magic for SysV R4 PKG Datastreams +# +0 string #\ PaCkAgE\ DaTaStReAm pkg Datastream (SVR4) +!:mime application/x-svr4-package diff --git a/magic/printer b/magic/printer new file mode 100644 index 0000000000..cdce275b12 --- /dev/null +++ b/magic/printer @@ -0,0 +1,14 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: printer,v 1.24 2011/05/08 16:34:51 christos Exp $ +# printer: file(1) magic for printer-formatted files +# + +# PostScript, updated by Daniel Quinlan (quinlan@yggdrasil.com) +0 string %! PostScript document text +!:mime application/postscript +!:apple ASPSTEXT +# Some PCs have the annoying habit of adding a ^D as a document separator +0 string \004%! PostScript document text +!:mime application/postscript +!:apple ASPSTEXT diff --git a/magic/python b/magic/python new file mode 100644 index 0000000000..1cd724bc59 --- /dev/null +++ b/magic/python @@ -0,0 +1,46 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: python,v 1.21 2012/06/21 01:12:51 christos Exp $ +# python: file(1) magic for python +# + +0 search/1/w #!\ /usr/bin/python Python script text executable +!:mime text/x-python +0 search/1/w #!\ /usr/local/bin/python Python script text executable +!:mime text/x-python +0 search/1 #!/usr/bin/env\ python Python script text executable +!:mime text/x-python +0 search/1 #!\ /usr/bin/env\ python Python script text executable +!:mime text/x-python + +# from module.submodule import func1, func2 +0 regex \^from\\s+(\\w|\\.)+\\s+import.*$ Python script text executable +!:mime text/x-python + +# def __init__ (self, ...): +0 search/4096 def\ __init__ +>&0 search/64 self Python script text executable +!:mime text/x-python + +# comments +0 search/4096 ''' +>&0 regex .*'''$ Python script text executable +!:mime text/x-python + +0 search/4096 """ +>&0 regex .*"""$ Python script text executable +!:mime text/x-python + +# try: +# except: or finally: +# block +0 search/4096 try: +>&0 regex \^\\s*except.*: Python script text executable +!:mime text/x-python +>&0 search/4096 finally: Python script text executable +!:mime text/x-python + +# def name(args, args): +0 regex \^(\ |\\t)*def\ +[a-zA-Z]+ +>&0 regex \ *\\(([a-zA-Z]|,|\ )*\\):$ Python script text executable +!:mime text/x-python diff --git a/magic/riff b/magic/riff new file mode 100644 index 0000000000..929dc9aa89 --- /dev/null +++ b/magic/riff @@ -0,0 +1,36 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: riff,v 1.22 2011/09/06 11:00:06 christos Exp $ +# riff: file(1) magic for RIFF format +# See +# +# http://www.seanet.com/users/matts/riffmci/riffmci.htm +# +# AVI section extended by Patrik Radman +# +0 string RIFF RIFF (little-endian) data +# Microsoft WAVE format (*.wav) +>8 string WAVE \b, WAVE audio +!:mime audio/x-wav +# Corel Draw Picture +>8 string CDRA \b, Corel Draw Picture +!:mime image/x-coreldraw +# AVI == Audio Video Interleave +>8 string AVI\040 \b, AVI +!:mime video/x-msvideo + +#------------------------------------------------------------------------------ +# Sony Wave64 +# see http://www.vcs.de/fileadmin/user_upload/MBS/PDF/Whitepaper/Informations_about_Sony_Wave64.pdf +# 128 bit RIFF-GUID { 66666972-912E-11CF-A5D6-28DB04C10000 } in little-endian +0 string riff\x2E\x91\xCF\x11\xA5\xD6\x28\xDB\x04\xC1\x00\x00 Sony Wave64 RIFF data +# 128 bit + total file size (64 bits) so 24 bytes +# then WAVE-GUID { 65766177-ACF3-11D3-8CD1-00C04F8EDB8A } +>24 string wave\xF3\xAC\xD3\x11\x8C\xD1\x00\xC0\x4F\x8E\xDB\x8A \b, WAVE 64 audio +!:mime audio/x-w64 + +#------------------------------------------------------------------------------ +# MBWF/RF64 +# see EBU TECH 3306 http://tech.ebu.ch/docs/tech/tech3306-2009.pdf +0 string RF64\xff\xff\xff\xffWAVEds64 MBWF/RF64 audio +!:mime audio/x-wav diff --git a/magic/rpm b/magic/rpm new file mode 100644 index 0000000000..2558ebeef1 --- /dev/null +++ b/magic/rpm @@ -0,0 +1,12 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: rpm,v 1.11 2011/06/14 12:47:41 christos Exp $ +# +# RPM: file(1) magic for Red Hat Packages Erik Troan (ewt@redhat.com) +# +0 belong 0xedabeedb RPM +!:mime application/x-rpm + +#delta RPM Daniel Novotny (dnovotny@redhat.com) +0 string drpm Delta RPM +!:mime application/x-rpm diff --git a/magic/rtf b/magic/rtf new file mode 100644 index 0000000000..0719264e47 --- /dev/null +++ b/magic/rtf @@ -0,0 +1,9 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File$ +# rtf: file(1) magic for Rich Text Format (RTF) +# +# Duncan P. Simpson, D.P.Simpson@dcs.warwick.ac.uk +# +0 string {\\rtf Rich Text Format data, +!:mime text/rtf diff --git a/magic/ruby b/magic/ruby new file mode 100644 index 0000000000..41682a89ad --- /dev/null +++ b/magic/ruby @@ -0,0 +1,28 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: ruby,v 1.4 2010/07/08 20:24:13 christos Exp $ +# ruby: file(1) magic for Ruby scripting language +# URL: http://www.ruby-lang.org/ +# From: Reuben Thomas + +# Ruby scripts +0 search/1/w #!\ /usr/bin/ruby Ruby script text executable +!:mime text/x-ruby +0 search/1/w #!\ /usr/local/bin/ruby Ruby script text executable +!:mime text/x-ruby +0 search/1 #!/usr/bin/env\ ruby Ruby script text executable +!:mime text/x-ruby +0 search/1 #!\ /usr/bin/env\ ruby Ruby script text executable +!:mime text/x-ruby + +# What looks like ruby, but does not have a shebang +# (modules and such) +# From: Lubomir Rintel +0 regex \^[\ \t]*require[\ \t]'[A-Za-z_/]+' +>0 regex include\ [A-Z]|def\ [a-z]|\ do$ +>>0 regex \^[\ \t]*end([\ \t]*[;#].*)?$ Ruby script text +!:mime text/x-ruby +0 regex \^[\ \t]*(class|module)[\ \t][A-Z] +>0 regex (modul|includ)e\ [A-Z]|def\ [a-z] +>>0 regex \^[\ \t]*end([\ \t]*[;#].*)?$ Ruby module source text +!:mime text/x-ruby diff --git a/magic/sc b/magic/sc new file mode 100644 index 0000000000..75333b3916 --- /dev/null +++ b/magic/sc @@ -0,0 +1,7 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File$ +# sc: file(1) magic for "sc" spreadsheet +# +38 string Spreadsheet sc spreadsheet file +!:mime application/x-sc diff --git a/magic/sgml b/magic/sgml new file mode 100644 index 0000000000..64efa2c153 --- /dev/null +++ b/magic/sgml @@ -0,0 +1,82 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: sgml,v 1.28 2012/04/28 21:20:26 christos Exp $ +# Type: SVG Vectorial Graphics +# From: Noel Torres +0 string \15 string >\0 +>>19 search/4096 \>19 search/4096 \15 string >\0 +>>19 search/4096/cWbt \15 string >\0 +>>19 search/4096/cWbt \15 string >\0 +>>19 search/4096/cWbt \ + +# Although we may know the offset of certain text fields in TeX DVI +# and font files, we can't use them reliably because they are not +# zero terminated. [but we do anyway, christos] +0 string \367\002 TeX DVI file +!:mime application/x-dvi + +# There is no way to detect TeX Font Metric (*.tfm) files without +# breaking them apart and reading the data. The following patterns +# match most *.tfm files generated by METAFONT or afm2tfm. +2 string \000\021 TeX font metric data +!:mime application/x-tex-tfm +2 string \000\022 TeX font metric data +!:mime application/x-tex-tfm + +# Texinfo and GNU Info, from Daniel Quinlan (quinlan@yggdrasil.com) +0 search/1 \\input\ texinfo Texinfo source text +!:mime text/x-texinfo +0 search/1 This\ is\ Info\ file GNU Info text +!:mime text/x-info + +# TeX documents, from Daniel Quinlan (quinlan@yggdrasil.com) +0 search/4096 \\input TeX document text +!:mime text/x-tex +!:strength + 15 +0 search/4096 \\section LaTeX document text +!:mime text/x-tex +!:strength + 18 +0 search/4096 \\setlength LaTeX document text +!:mime text/x-tex +!:strength + 15 +0 search/4096 \\documentstyle LaTeX document text +!:mime text/x-tex +!:strength + 18 +0 search/4096 \\chapter LaTeX document text +!:mime text/x-tex +!:strength + 18 +0 search/4096 \\documentclass LaTeX 2e document text +!:mime text/x-tex +!:strength + 15 +0 search/4096 \\relax LaTeX auxiliary file +!:mime text/x-tex +!:strength + 15 +0 search/4096 \\contentsline LaTeX table of contents +!:mime text/x-tex +!:strength + 15 +0 search/4096 %\ -*-latex-*- LaTeX document text +!:mime text/x-tex diff --git a/magic/troff b/magic/troff new file mode 100644 index 0000000000..7f60b1d9b3 --- /dev/null +++ b/magic/troff @@ -0,0 +1,22 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File$ +# troff: file(1) magic for *roff +# +# updated by Daniel Quinlan (quinlan@yggdrasil.com) + +# troff input +0 search/1 .\\" troff or preprocessor input text +!:mime text/troff +0 search/1 '\\" troff or preprocessor input text +!:mime text/troff +0 search/1 '.\\" troff or preprocessor input text +!:mime text/troff +0 search/1 \\" troff or preprocessor input text +!:mime text/troff +0 search/1 ''' troff or preprocessor input text +!:mime text/troff +0 regex/20 \^\\.[A-Za-z0-9][A-Za-z0-9][\ \t] troff or preprocessor input text +!:mime text/troff +0 regex/20 \^\\.[A-Za-z0-9][A-Za-z0-9]$ troff or preprocessor input text +!:mime text/troff diff --git a/magic/vorbis b/magic/vorbis new file mode 100644 index 0000000000..4d25c3c3cd --- /dev/null +++ b/magic/vorbis @@ -0,0 +1,26 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File$ +# vorbis: file(1) magic for Ogg/Vorbis files +# +# From Felix von Leitner +# Extended by Beni Cherniavsky +# Further extended by Greg Wooledge +# +# Most (everything but the number of channels and bitrate) is commented +# out with `##' as it's not interesting to the average user. The most +# probable things advanced users would want to uncomment are probably +# the number of comments and the encoder version. +# +# FIXME: The first match has been made a search, so that it can skip +# over prepended ID3 tags. This will work for MIME type detection, but +# won't work for detecting other properties of the file (they all need +# to be made relative to the search). In any case, if the file has ID3 +# tags, the ID3 information will be printed, not the Ogg information, +# so until that's fixed, this doesn't matter. +# FIXME[2]: Disable the above for now, since search assumes text mode. +# +# --- Ogg Framing --- +#0 search/1000 OggS Ogg data +0 string OggS Ogg data +!:mime application/ogg diff --git a/magic/warc b/magic/warc new file mode 100644 index 0000000000..2a2aeb6fae --- /dev/null +++ b/magic/warc @@ -0,0 +1,14 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: warc,v 1.2 2009/09/19 16:28:13 christos Exp $ +# warc: file(1) magic for WARC files + +0 string WARC/ WARC Archive +>5 string x version %.4s +!:mime application/warc + +#------------------------------------------------------------------------------ +# Arc File Format from Internet Archive +# see http://www.archive.org/web/researcher/ArcFileFormat.php +0 string filedesc:// Internet Archive File +!:mime application/x-ia-arc diff --git a/magic/windows b/magic/windows new file mode 100644 index 0000000000..6a529782a9 --- /dev/null +++ b/magic/windows @@ -0,0 +1,19 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: windows,v 1.4 2009/09/19 16:28:13 christos Exp $ +# windows: file(1) magic for Microsoft Windows +# +# This file is mainly reserved for files where programs +# using them are run almost always on MS Windows 3.x or +# above, or files only used exclusively in Windows OS, +# where there is no better category to allocate for. +# For example, even though WinZIP almost run on Windows +# only, it is better to treat them as "archive" instead. +# For format usable in DOS, such as generic executable +# format, please specify under "msdos" file. +# + +# From: Pal Tamas +# Autorun File +0 string/c [autorun]\r\n Microsoft Windows Autorun file. +!:mime application/x-setupscript. diff --git a/magic/wordprocessors b/magic/wordprocessors new file mode 100644 index 0000000000..7de3413c0a --- /dev/null +++ b/magic/wordprocessors @@ -0,0 +1,43 @@ +# See COPYING file in this directory for original libmagic copyright. +#------------------------------------------------------------------------------ +# $File: wordprocessors,v 1.16 2012/10/29 17:36:49 christos Exp $ +# wordprocessors: file(1) magic fo word processors. +# + +# Hangul (Korean) Word Processor File +# From: Won-Kyu Park +512 string R\0o\0o\0t\0 Hangul (Korean) Word Processor File 2000 +!:mime application/x-hwp + +# Quark Express from http://www.garykessler.net/library/file_sigs.html +2 string MMXPR3 Motorola Quark Express Document (English) +!:mime application/x-quark-xpress-3 + +#------------------------------------------------------------------------------ +# ichitaro456: file(1) magic for Just System Word Processor Ichitaro +# +# Contributor kenzo-: +# Reversed-engineered JS Ichitaro magic numbers +# + +0 string DOC +>43 byte 0x14 Just System Word Processor Ichitaro v4 +!:mime application/x-ichitaro4 + +0 string DOC +>43 byte 0x15 Just System Word Processor Ichitaro v5 +!:mime application/x-ichitaro5 + +0 string DOC +>43 byte 0x16 Just System Word Processor Ichitaro v6 +!:mime application/x-ichitaro6 + +# Type: Freemind mindmap documents +# From: Jamie Thompson +0 string/w \ +0 string \Error("can't init libmagic: %s", err ? err : "unknown"); + if ( ! err ) err = "unknown"; + reporter->InternalError("can't init libmagic: %s", err); } - else if ( magic_load(*cookie_ptr, 0) < 0 ) + else if ( magic_load(*cookie_ptr, database) < 0 ) { const char* err = magic_error(*cookie_ptr); - reporter->Error("can't load magic file: %s", err ? err : "unknown"); + if ( ! err ) err = "unknown"; + reporter->InternalError("can't load magic file: %s", err); magic_close(*cookie_ptr); *cookie_ptr = 0; } diff --git a/src/util.h b/src/util.h index 716fead054..4e352457b7 100644 --- a/src/util.h +++ b/src/util.h @@ -188,6 +188,7 @@ extern void pinpoint(); extern int int_list_cmp(const void* v1, const void* v2); extern const char* bro_path(); +extern const char* bro_magic_path(); extern const char* bro_prefixes(); std::string dot_canon(std::string path, std::string file, std::string prefix = ""); const char* normalize_path(const char* path); diff --git a/testing/btest/Baseline/core.tunnels.ayiya/http.log b/testing/btest/Baseline/core.tunnels.ayiya/http.log index 902d0e0fb9..cab51f8224 100644 --- a/testing/btest/Baseline/core.tunnels.ayiya/http.log +++ b/testing/btest/Baseline/core.tunnels.ayiya/http.log @@ -6,7 +6,7 @@ #open 2013-03-22-14-38-11 #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extraction_file #types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string string -1257655301.652206 5OKnoww6xl4 2001:4978:f:4c::2 53382 2001:4860:b002::68 80 1 GET ipv6.google.com / - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en; rv:1.9.0.15pre) Gecko/2009091516 Camino/2.0b4 (like Firefox/3.0.15pre) 0 10102 200 OK - - - (empty) - - - text/html; charset=us-ascii - - +1257655301.652206 5OKnoww6xl4 2001:4978:f:4c::2 53382 2001:4860:b002::68 80 1 GET ipv6.google.com / - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en; rv:1.9.0.15pre) Gecko/2009091516 Camino/2.0b4 (like Firefox/3.0.15pre) 0 10102 200 OK - - - (empty) - - - text/html - - 1257655302.514424 5OKnoww6xl4 2001:4978:f:4c::2 53382 2001:4860:b002::68 80 2 GET ipv6.google.com /csi?v=3&s=webhp&action=&tran=undefined&e=17259,19771,21517,21766,21887,22212&ei=BUz2Su7PMJTglQfz3NzCAw&rt=prt.77,xjs.565,ol.645 http://ipv6.google.com/ Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en; rv:1.9.0.15pre) Gecko/2009091516 Camino/2.0b4 (like Firefox/3.0.15pre) 0 0 204 No Content - - - (empty) - - - - - - 1257655303.603569 5OKnoww6xl4 2001:4978:f:4c::2 53382 2001:4860:b002::68 80 3 GET ipv6.google.com /gen_204?atyp=i&ct=fade&cad=1254&ei=BUz2Su7PMJTglQfz3NzCAw&zx=1257655303600 http://ipv6.google.com/ Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en; rv:1.9.0.15pre) Gecko/2009091516 Camino/2.0b4 (like Firefox/3.0.15pre) 0 0 204 No Content - - - (empty) - - - - - - #close 2013-03-22-14-38-11 diff --git a/testing/btest/Baseline/core.tunnels.gtp.different_dl_and_ul/http.log b/testing/btest/Baseline/core.tunnels.gtp.different_dl_and_ul/http.log index 02c5cf6e63..51f3b28791 100644 --- a/testing/btest/Baseline/core.tunnels.gtp.different_dl_and_ul/http.log +++ b/testing/btest/Baseline/core.tunnels.gtp.different_dl_and_ul/http.log @@ -6,6 +6,6 @@ #open 2013-03-22-14-37-45 #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extraction_file #types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string string -1333458850.340368 arKYeMETxOg 10.131.17.170 51803 173.199.115.168 80 1 GET cdn.epicgameads.com /ads/flash/728x90_nx8com.swf?clickTAG=http://www.epicgameads.com/ads/bannerclickPage.php?id=e3ubwU6IF&pd=1&adid=0&icpc=1&axid=0&uctt=1&channel=4&cac=1&t=728x90&cb=1333458879 http://www.epicgameads.com/ads/banneriframe.php?id=e3ubwU6IF&t=728x90&channel=4&cb=1333458905296 Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0) 0 31461 200 OK - - - (empty) - - - application/x-shockwave-flash; charset=binary - - -1333458850.399501 arKYeMETxOg 10.131.17.170 51803 173.199.115.168 80 2 GET cdn.epicgameads.com /ads/flash/728x90_nx8com.swf?clickTAG=http://www.epicgameads.com/ads/bannerclickPage.php?id=e3ubwU6IF&pd=1&adid=0&icpc=1&axid=0&uctt=1&channel=0&cac=1&t=728x90&cb=1333458881 http://www.epicgameads.com/ads/banneriframe.php?id=e3ubwU6IF&t=728x90&cb=1333458920207 Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0) 0 31461 200 OK - - - (empty) - - - application/x-shockwave-flash; charset=binary - - +1333458850.340368 arKYeMETxOg 10.131.17.170 51803 173.199.115.168 80 1 GET cdn.epicgameads.com /ads/flash/728x90_nx8com.swf?clickTAG=http://www.epicgameads.com/ads/bannerclickPage.php?id=e3ubwU6IF&pd=1&adid=0&icpc=1&axid=0&uctt=1&channel=4&cac=1&t=728x90&cb=1333458879 http://www.epicgameads.com/ads/banneriframe.php?id=e3ubwU6IF&t=728x90&channel=4&cb=1333458905296 Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0) 0 31461 200 OK - - - (empty) - - - application/x-shockwave-flash - - +1333458850.399501 arKYeMETxOg 10.131.17.170 51803 173.199.115.168 80 2 GET cdn.epicgameads.com /ads/flash/728x90_nx8com.swf?clickTAG=http://www.epicgameads.com/ads/bannerclickPage.php?id=e3ubwU6IF&pd=1&adid=0&icpc=1&axid=0&uctt=1&channel=0&cac=1&t=728x90&cb=1333458881 http://www.epicgameads.com/ads/banneriframe.php?id=e3ubwU6IF&t=728x90&cb=1333458920207 Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0) 0 31461 200 OK - - - (empty) - - - application/x-shockwave-flash - - #close 2013-03-22-14-37-45 diff --git a/testing/btest/Baseline/core.tunnels.gtp.outer_ip_frag/http.log b/testing/btest/Baseline/core.tunnels.gtp.outer_ip_frag/http.log index 123fe9fb3d..5067915aff 100644 --- a/testing/btest/Baseline/core.tunnels.gtp.outer_ip_frag/http.log +++ b/testing/btest/Baseline/core.tunnels.gtp.outer_ip_frag/http.log @@ -6,5 +6,5 @@ #open 2013-03-28-21-35-15 #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extraction_file #types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string string -1333458850.375568 arKYeMETxOg 10.131.47.185 1923 79.101.110.141 80 1 GET o-o.preferred.telekomrs-beg1.v2.lscache8.c.youtube.com /videoplayback?upn=MTU2MDY5NzQ5OTM0NTI3NDY4NDc&sparams=algorithm,burst,cp,factor,id,ip,ipbits,itag,source,upn,expire&fexp=912300,907210&algorithm=throttle-factor&itag=34&ip=212.0.0.0&burst=40&sver=3&signature=832FB1042E20780CFCA77A4DB5EA64AC593E8627.D1166C7E8365732E52DAFD68076DAE0146E0AE01&source=youtube&expire=1333484980&key=yt1&ipbits=8&factor=1.25&cp=U0hSSFRTUl9NSkNOMl9MTVZKOjh5eEN2SG8tZF84&id=ebf1e932d4bd1286&cm2=1 http://s.ytimg.com/yt/swfbin/watch_as3-vflqrJwOA.swf Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko; X-SBLSP) Chrome/17.0.963.83 Safari/535.11 0 56320 206 Partial Content - - - (empty) - - - application/octet-stream; charset=binary - - +1333458850.375568 arKYeMETxOg 10.131.47.185 1923 79.101.110.141 80 1 GET o-o.preferred.telekomrs-beg1.v2.lscache8.c.youtube.com /videoplayback?upn=MTU2MDY5NzQ5OTM0NTI3NDY4NDc&sparams=algorithm,burst,cp,factor,id,ip,ipbits,itag,source,upn,expire&fexp=912300,907210&algorithm=throttle-factor&itag=34&ip=212.0.0.0&burst=40&sver=3&signature=832FB1042E20780CFCA77A4DB5EA64AC593E8627.D1166C7E8365732E52DAFD68076DAE0146E0AE01&source=youtube&expire=1333484980&key=yt1&ipbits=8&factor=1.25&cp=U0hSSFRTUl9NSkNOMl9MTVZKOjh5eEN2SG8tZF84&id=ebf1e932d4bd1286&cm2=1 http://s.ytimg.com/yt/swfbin/watch_as3-vflqrJwOA.swf Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko; X-SBLSP) Chrome/17.0.963.83 Safari/535.11 0 56320 206 Partial Content - - - (empty) - - - application/octet-stream - - #close 2013-03-28-21-35-15 diff --git a/testing/btest/Baseline/core.tunnels.teredo/http.log b/testing/btest/Baseline/core.tunnels.teredo/http.log index fe015a130d..f8be9be69b 100644 --- a/testing/btest/Baseline/core.tunnels.teredo/http.log +++ b/testing/btest/Baseline/core.tunnels.teredo/http.log @@ -6,8 +6,8 @@ #open 2013-03-22-14-37-44 #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extraction_file #types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string string -1210953057.917183 3PKsZ2Uye21 192.168.2.16 1578 75.126.203.78 80 1 POST download913.avast.com /cgi-bin/iavs4stats.cgi - Syncer/4.80 (av_pro-1169;f) 589 0 204 - - - (empty) - - - text/plain; charset=us-ascii - - -1210953061.585996 70MGiRM1Qf4 2001:0:4137:9e50:8000:f12a:b9c8:2815 1286 2001:4860:0:2001::68 80 1 GET ipv6.google.com / - Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5 0 6640 200 OK - - - (empty) - - - text/html; charset=us-ascii - - -1210953073.381474 70MGiRM1Qf4 2001:0:4137:9e50:8000:f12a:b9c8:2815 1286 2001:4860:0:2001::68 80 2 GET ipv6.google.com /search?hl=en&q=Wireshark+!&btnG=Google+Search http://ipv6.google.com/ Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5 0 25119 200 OK - - - (empty) - - - text/html; charset=us-ascii - - -1210953074.674817 c4Zw9TmAE05 192.168.2.16 1580 67.228.110.120 80 1 GET www.wireshark.org / http://ipv6.google.com/search?hl=en&q=Wireshark+%21&btnG=Google+Search Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5 0 11845 200 OK - - - (empty) - - - application/xml; charset=us-ascii - - +1210953057.917183 3PKsZ2Uye21 192.168.2.16 1578 75.126.203.78 80 1 POST download913.avast.com /cgi-bin/iavs4stats.cgi - Syncer/4.80 (av_pro-1169;f) 589 0 204 - - - (empty) - - - text/plain - - +1210953061.585996 70MGiRM1Qf4 2001:0:4137:9e50:8000:f12a:b9c8:2815 1286 2001:4860:0:2001::68 80 1 GET ipv6.google.com / - Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5 0 6640 200 OK - - - (empty) - - - text/html - - +1210953073.381474 70MGiRM1Qf4 2001:0:4137:9e50:8000:f12a:b9c8:2815 1286 2001:4860:0:2001::68 80 2 GET ipv6.google.com /search?hl=en&q=Wireshark+!&btnG=Google+Search http://ipv6.google.com/ Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5 0 25119 200 OK - - - (empty) - - - text/html - - +1210953074.674817 c4Zw9TmAE05 192.168.2.16 1580 67.228.110.120 80 1 GET www.wireshark.org / http://ipv6.google.com/search?hl=en&q=Wireshark+%21&btnG=Google+Search Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5 0 11845 200 OK - - - (empty) - - - application/xml - - #close 2013-03-22-14-37-44 diff --git a/testing/btest/Baseline/core.tunnels.teredo_bubble_with_payload/http.log b/testing/btest/Baseline/core.tunnels.teredo_bubble_with_payload/http.log index 6d6f00a151..4ad6d6cd60 100644 --- a/testing/btest/Baseline/core.tunnels.teredo_bubble_with_payload/http.log +++ b/testing/btest/Baseline/core.tunnels.teredo_bubble_with_payload/http.log @@ -6,6 +6,6 @@ #open 2013-03-22-14-37-44 #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extraction_file #types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string string -1340127577.361683 FrJExwHcSal 2001:0:4137:9e50:8000:f12a:b9c8:2815 1286 2001:4860:0:2001::68 80 1 GET ipv6.google.com / - Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5 0 6640 200 OK - - - (empty) - - - text/html; charset=us-ascii - - -1340127577.379360 FrJExwHcSal 2001:0:4137:9e50:8000:f12a:b9c8:2815 1286 2001:4860:0:2001::68 80 2 GET ipv6.google.com /search?hl=en&q=Wireshark+!&btnG=Google+Search http://ipv6.google.com/ Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5 0 25119 200 OK - - - (empty) - - - text/html; charset=us-ascii - - +1340127577.361683 FrJExwHcSal 2001:0:4137:9e50:8000:f12a:b9c8:2815 1286 2001:4860:0:2001::68 80 1 GET ipv6.google.com / - Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5 0 6640 200 OK - - - (empty) - - - text/html - - +1340127577.379360 FrJExwHcSal 2001:0:4137:9e50:8000:f12a:b9c8:2815 1286 2001:4860:0:2001::68 80 2 GET ipv6.google.com /search?hl=en&q=Wireshark+!&btnG=Google+Search http://ipv6.google.com/ Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5 0 25119 200 OK - - - (empty) - - - text/html - - #close 2013-03-22-14-37-44 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out index c16f2bc1e1..d0816d8a94 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.actions.data_event/out @@ -2,8 +2,8 @@ FILE_NEW Cx92a0ym5R8, 0, 0 FILE_BOF_BUFFER ^J0.26 | 201 -FILE_TYPE -mime type is set +MIME_TYPE +text/plain file_stream, Cx92a0ym5R8, 1500, ^J0.26 | 2012-08-24 15:10:04 -0700^J^J * Fixing update-changes, which could pick the wrong control file. (Robin Sommer)^J^J * Fixing GPG signing script. (Robin Sommer)^J^J0.25 | 2012-08-01 13:55:46 -0500^J^J * Fix configure script to exit with non-zero status on error (Jon Siwek)^J^J0.24 | 2012-07-05 12:50:43 -0700^J^J * Raise minimum required CMake version to 2.6.3 (Jon Siwek)^J^J * Adding script to delete old fully-merged branches. (Robin Sommer)^J^J0.23-2 | 2012-01-25 13:24:01 -0800^J^J * Fix a bro-cut error message. (Daniel Thayer)^J^J0.23 | 2012-01-11 12:16:11 -0800^J^J * Tweaks to release scripts, plus a new one for signing files.^J (Robin Sommer)^J^J0.22 | 2012-01-10 16:45:19 -0800^J^J * Tweaks for OpenBSD support. (Jon Siwek)^J^J * bro-cut extensions and fixes. (Robin Sommer)^J ^J - If no field names are given on the command line, we now pass through^J all fields. Adresses #657.^J^J - Removing some GNUism from awk script. Addresses #653.^J^J - Added option for time output in UTC. Addresses #668.^J^J - Added output field separator option -F. Addresses #649.^J^J - Fixing option -c: only some header lines were passed through^J rather than all. (Robin Sommer)^J^J * Fix parallel make portability. (Jon Siwek)^J^J0.21-9 | 2011-11-07 05:44:14 -0800^J^J * Fixing compiler warnings. Addresses #388. (Jon Siwek)^J^J0.21-2 | 2011-11-02 18:12:13 -0700^J^J * Fix for misnaming temp file in update-changes script. (Robin Sommer)^J^J0.21-1 | 2011-11-02 18:10:39 -0700^J^J * Little fix for make-relea file_chunk, Cx92a0ym5R8, 1500, 0, ^J0.26 | 2012-08-24 15:10:04 -0700^J^J * Fixing update-changes, which could pick the wrong control file. (Robin Sommer)^J^J * Fixing GPG signing script. (Robin Sommer)^J^J0.25 | 2012-08-01 13:55:46 -0500^J^J * Fix configure script to exit with non-zero status on error (Jon Siwek)^J^J0.24 | 2012-07-05 12:50:43 -0700^J^J * Raise minimum required CMake version to 2.6.3 (Jon Siwek)^J^J * Adding script to delete old fully-merged branches. (Robin Sommer)^J^J0.23-2 | 2012-01-25 13:24:01 -0800^J^J * Fix a bro-cut error message. (Daniel Thayer)^J^J0.23 | 2012-01-11 12:16:11 -0800^J^J * Tweaks to release scripts, plus a new one for signing files.^J (Robin Sommer)^J^J0.22 | 2012-01-10 16:45:19 -0800^J^J * Tweaks for OpenBSD support. (Jon Siwek)^J^J * bro-cut extensions and fixes. (Robin Sommer)^J ^J - If no field names are given on the command line, we now pass through^J all fields. Adresses #657.^J^J - Removing some GNUism from awk script. Addresses #653.^J^J - Added option for time output in UTC. Addresses #668.^J^J - Added output field separator option -F. Addresses #649.^J^J - Fixing option -c: only some header lines were passed through^J rather than all. (Robin Sommer)^J^J * Fix parallel make portability. (Jon Siwek)^J^J0.21-9 | 2011-11-07 05:44:14 -0800^J^J * Fixing compiler warnings. Addresses #388. (Jon Siwek)^J^J0.21-2 | 2011-11-02 18:12:13 -0700^J^J * Fix for misnaming temp file in update-changes script. (Robin Sommer)^J^J0.21-1 | 2011-11-02 18:10:39 -0700^J^J * Little fix for make-relea file_stream, Cx92a0ym5R8, 1024, se script, which could pick out the wrong^J tag. (Robin Sommer)^J^J0.21 | 2011-10-27 17:40:45 -0700^J^J * Fixing bro-cut's usage message and argument error handling. (Robin Sommer)^J^J * Bugfix in update-changes script. (Robin Sommer)^J^J * update-changes now ignores commits it did itself. (Robin Sommer)^J^J * Fix a bug in the update-changes script. (Robin Sommer)^J^J * bro-cut now always installs to $prefix/bin by `make install`. (Jon Siwek)^J^J * Options to adjust time format for bro-cut. (Robin Sommer)^J^J The default with -d is now ISO format. The new option "-D "^J specifies a custom strftime()-style format string. Alternatively,^J the environment variable BRO_CUT_TIMEFMT can set the format as^J well.^J^J * bro-cut now understands the field separator header. (Robin Sommer)^J^J * Renaming options -h/-H -> -c/-C, and doing some general cleanup.^J^J0.2 | 2011-10-25 19:53:57 -0700^J^J * Adding support for replacing version string in a setup.py. (Robin^J Sommer)^J^J * Change generated root cert DN indices f diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.postpone_timeout/bro..stdout b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.postpone_timeout/bro..stdout index 67a56e0d89..9cb4b5cc68 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.postpone_timeout/bro..stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.postpone_timeout/bro..stdout @@ -1,7 +1,7 @@ FILE_NEW oDwT1BbzjM1, 0, 0 -FILE_TYPE -mime type is set +MIME_TYPE +application/x-dosexec FILE_STATE_REMOVE oDwT1BbzjM1, 1022920, 0 [orig_h=192.168.72.14, orig_p=3254/tcp, resp_h=65.54.95.206, resp_p=80/tcp] @@ -9,8 +9,8 @@ total bytes: 1022920 source: HTTP FILE_NEW oDwT1BbzjM1, 0, 0 -FILE_TYPE -mime type is set +MIME_TYPE +application/octet-stream FILE_TIMEOUT FILE_TIMEOUT FILE_STATE_REMOVE diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out index d3ba15b958..846363961e 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.remove_action/get.out @@ -2,8 +2,8 @@ FILE_NEW Cx92a0ym5R8, 0, 0 FILE_BOF_BUFFER ^J0.26 | 201 -FILE_TYPE -mime type is set +MIME_TYPE +text/plain FILE_STATE_REMOVE Cx92a0ym5R8, 4705, 0 [orig_h=141.142.228.5, orig_p=59856/tcp, resp_h=192.150.187.43, resp_p=80/tcp] diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out index 3d082df87d..6000f36f85 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.bifs.stop/get.out @@ -2,5 +2,5 @@ FILE_NEW Cx92a0ym5R8, 0, 0 FILE_BOF_BUFFER ^J0.26 | 201 -FILE_TYPE -mime type is set +MIME_TYPE +text/plain diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out index a3fa989e49..2e1907c91c 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.ftp/out @@ -2,8 +2,8 @@ FILE_NEW sidhzrR4IT8, 0, 0 FILE_BOF_BUFFER The Nationa -FILE_TYPE -mime type is set +MIME_TYPE +text/x-pascal FILE_STATE_REMOVE sidhzrR4IT8, 16557, 0 [orig_h=141.142.228.5, orig_p=50737/tcp, resp_h=141.142.192.162, resp_p=38141/tcp] diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out index 297edfc767..c425a7301f 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get-gzip.out @@ -2,8 +2,8 @@ FILE_NEW kg59rqyYxN, 0, 0 FILE_BOF_BUFFER {^J "origin -FILE_TYPE -mime type is set +MIME_TYPE +text/plain FILE_STATE_REMOVE kg59rqyYxN, 197, 0 [orig_h=141.142.228.5, orig_p=50153/tcp, resp_h=54.243.118.187, resp_p=80/tcp] diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out index 801fd2bd6c..ebac8b624e 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.get/get.out @@ -2,8 +2,8 @@ FILE_NEW Cx92a0ym5R8, 0, 0 FILE_BOF_BUFFER ^J0.26 | 201 -FILE_TYPE -mime type is set +MIME_TYPE +text/plain FILE_STATE_REMOVE Cx92a0ym5R8, 4705, 0 [orig_h=141.142.228.5, orig_p=59856/tcp, resp_h=192.150.187.43, resp_p=80/tcp] diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out index 61c164c81b..02435aaca7 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/a.out @@ -1,7 +1,7 @@ FILE_NEW 7gZBKVUgy4l, 0, 0 -FILE_TYPE -mime type is set +MIME_TYPE +application/pdf FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE 7gZBKVUgy4l, 555523, 0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out index 4d0c0a77ae..8ea01332c8 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/b.out @@ -1,7 +1,7 @@ FILE_NEW oDwT1BbzjM1, 0, 0 -FILE_TYPE -mime type is set +MIME_TYPE +application/x-dosexec FILE_STATE_REMOVE oDwT1BbzjM1, 1022920, 0 [orig_h=192.168.72.14, orig_p=3254/tcp, resp_h=65.54.95.206, resp_p=80/tcp] @@ -9,8 +9,8 @@ total bytes: 1022920 source: HTTP FILE_NEW oDwT1BbzjM1, 0, 0 -FILE_TYPE -mime type is set +MIME_TYPE +application/octet-stream FILE_TIMEOUT FILE_STATE_REMOVE oDwT1BbzjM1, 206024, 0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out index a0d1a21327..1ad4f52f36 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.partial-content/c.out @@ -1,7 +1,7 @@ FILE_NEW uHS14uhRKGe, 0, 0 -FILE_TYPE -mime type is set +MIME_TYPE +application/octet-stream FILE_OVER_NEW_CONNECTION FILE_STATE_REMOVE uHS14uhRKGe, 498702, 0 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out index ba2c318fa1..67d7c5d555 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.pipeline/out @@ -2,8 +2,8 @@ FILE_NEW aFQKI8SPOL2, 0, 0 FILE_BOF_BUFFER /*^J******** -FILE_TYPE -mime type is set +MIME_TYPE +text/plain FILE_STATE_REMOVE aFQKI8SPOL2, 2675, 0 [orig_h=192.168.1.104, orig_p=1673/tcp, resp_h=63.245.209.11, resp_p=80/tcp] @@ -15,8 +15,8 @@ FILE_NEW CCU3vUEr06l, 0, 0 FILE_BOF_BUFFER //-- Google -FILE_TYPE -mime type is set +MIME_TYPE +text/plain FILE_STATE_REMOVE CCU3vUEr06l, 21421, 0 [orig_h=192.168.1.104, orig_p=1673/tcp, resp_h=63.245.209.11, resp_p=80/tcp] @@ -28,8 +28,8 @@ FILE_NEW HCzA0dVwDPj, 0, 0 FILE_BOF_BUFFER GIF89a^D\0^D\0\xb3 -FILE_TYPE -mime type is set +MIME_TYPE +image/gif FILE_STATE_REMOVE HCzA0dVwDPj, 94, 0 [orig_h=192.168.1.104, orig_p=1673/tcp, resp_h=63.245.209.11, resp_p=80/tcp] @@ -42,8 +42,8 @@ FILE_NEW a1Zu1fteVEf, 0, 0 FILE_BOF_BUFFER \x89PNG^M^J^Z^J\0\0\0 -FILE_TYPE -mime type is set +MIME_TYPE +image/png FILE_STATE_REMOVE a1Zu1fteVEf, 2349, 0 [orig_h=192.168.1.104, orig_p=1673/tcp, resp_h=63.245.209.11, resp_p=80/tcp] @@ -56,8 +56,8 @@ FILE_NEW xXlF7wFdsR, 0, 0 FILE_BOF_BUFFER \x89PNG^M^J^Z^J\0\0\0 -FILE_TYPE -mime type is set +MIME_TYPE +image/png FILE_STATE_REMOVE xXlF7wFdsR, 27579, 0 [orig_h=192.168.1.104, orig_p=1673/tcp, resp_h=63.245.209.11, resp_p=80/tcp] diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out index 3f0146eea7..0eecaf994a 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.http.post/out @@ -2,8 +2,8 @@ FILE_NEW v5HLI7MxPQh, 0, 0 FILE_BOF_BUFFER hello world -FILE_TYPE -mime type is set +MIME_TYPE +text/plain FILE_STATE_REMOVE v5HLI7MxPQh, 11, 0 [orig_h=141.142.228.5, orig_p=53595/tcp, resp_h=54.243.55.129, resp_p=80/tcp] @@ -16,8 +16,8 @@ FILE_NEW PZS1XGHkIf1, 0, 0 FILE_BOF_BUFFER {^J "origin -FILE_TYPE -mime type is set +MIME_TYPE +text/plain FILE_STATE_REMOVE PZS1XGHkIf1, 366, 0 [orig_h=141.142.228.5, orig_p=53595/tcp, resp_h=54.243.55.129, resp_p=80/tcp] diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.input.basic/bro..stdout b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.input.basic/bro..stdout index d3845e39db..a7d837475f 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.input.basic/bro..stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.input.basic/bro..stdout @@ -2,8 +2,8 @@ FILE_NEW nYgPNGLrZf9, 0, 0 FILE_BOF_BUFFER #separator -FILE_TYPE -mime type is set +MIME_TYPE +text/plain FILE_STATE_REMOVE nYgPNGLrZf9, 311, 0 source: ../input.log diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out index ce5fd67778..03b0c9e932 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.irc/out @@ -2,8 +2,8 @@ FILE_NEW wqKMAamJVSb, 0, 0 FILE_BOF_BUFFER PK^C^D^T\0\0\0^H\0\xae -FILE_TYPE -mime type is set +MIME_TYPE +application/zip FILE_STATE_REMOVE wqKMAamJVSb, 42208, 0 [orig_h=192.168.1.77, orig_p=57655/tcp, resp_h=209.197.168.151, resp_p=1024/tcp] diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log index c67b9125f5..dba789ab13 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log @@ -3,8 +3,8 @@ #empty_field (empty) #unset_field - #path file_analysis -#open 2013-04-11-19-37-28 +#open 2013-04-12-14-46-48 #fields id parent_id source last_active seen_bytes total_bytes missing_bytes overflow_bytes timeout_interval bof_buffer_size mime_type timedout conn_uids analyzers extracted_files md5 sha1 sha256 #types string string string time count count count count interval count string bool table[string] table[enum] table[string] string string string -Cx92a0ym5R8 - HTTP 1362692527.009775 4705 4705 0 0 120.000000 1024 set F UWkUyAuUGXf FileAnalysis::ANALYZER_SHA1,FileAnalysis::ANALYZER_EXTRACT,FileAnalysis::ANALYZER_DATA_EVENT,FileAnalysis::ANALYZER_MD5,FileAnalysis::ANALYZER_SHA256 Cx92a0ym5R8-file 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18 -#close 2013-04-11-19-37-28 +Cx92a0ym5R8 - HTTP 1362692527.009775 4705 4705 0 0 120.000000 1024 text/plain F UWkUyAuUGXf FileAnalysis::ANALYZER_SHA1,FileAnalysis::ANALYZER_EXTRACT,FileAnalysis::ANALYZER_DATA_EVENT,FileAnalysis::ANALYZER_MD5,FileAnalysis::ANALYZER_SHA256 Cx92a0ym5R8-file 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18 +#close 2013-04-12-14-46-48 diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out index 188b010b35..83dc60f18e 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.smtp/out @@ -2,8 +2,8 @@ FILE_NEW cwR7l6Zctxb, 0, 0 FILE_BOF_BUFFER Hello^M^J^M^J ^M -FILE_TYPE -mime type is set +MIME_TYPE +text/plain FILE_STATE_REMOVE cwR7l6Zctxb, 79, 0 [orig_h=10.10.1.4, orig_p=1470/tcp, resp_h=74.53.140.153, resp_p=25/tcp] @@ -15,8 +15,8 @@ FILE_NEW ZAOEQmRyxv1, 0, 0 FILE_BOF_BUFFER - - - - - - - - (empty) - - - - ftp-item-Rqjkzoroau4-0.dat -1329843193.984222 k6kgXLOoSKl 141.142.220.235 59378 199.233.217.249 56667 - - - - - - - - (empty) - - - - ftp-item-BTsa70Ua9x7-1.dat -1329843179.926563 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test RETR ftp://199.233.217.249/./robots.txt text/plain ASCII text 77 226 Transfer complete. (empty) - - - - - -1329843194.040188 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PORT 141,142,220,235,131,46 - - - 200 PORT command successful. (empty) F 199.233.217.249 141.142.220.235 33582 - -1329843194.095782 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test LIST - - - - 226 Transfer complete. (empty) - - - - - -1329843197.672179 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PORT 141,142,220,235,147,203 - - - 200 PORT command successful. (empty) F 199.233.217.249 141.142.220.235 37835 - -1329843199.968212 nQcgTWjvg4c 199.233.217.249 61920 141.142.220.235 33582 - - - - - - - - (empty) - - - - ftp-item-VLQvJybrm38-2.dat -1329843197.727769 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test RETR ftp://199.233.217.249/./robots.txt text/plain ASCII text, with CRLF line terminators 77 226 Transfer complete. (empty) - - - - - -1329843200.079930 j4u32Pc5bif 199.233.217.249 61918 141.142.220.235 37835 - - - - - - - - (empty) - - - - ftp-item-zrfwSs9K1yk-3.dat -#close 2013-03-27-17-47-03 +#open 2013-04-12-16-32-25 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p user password command arg mime_type file_size reply_code reply_msg tags data_channel.passive data_channel.orig_h data_channel.resp_h data_channel.resp_p extraction_file +#types time string addr port addr port string string string string string count count string table[string] bool addr addr port string +1329843175.680248 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PASV - - - 227 Entering Passive Mode (199,233,217,249,221,90) (empty) T 141.142.220.235 199.233.217.249 56666 - +1329843175.791528 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test LIST - - - 226 Transfer complete. (empty) - - - - - +1329843179.815947 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PASV - - - 227 Entering Passive Mode (199,233,217,249,221,91) (empty) T 141.142.220.235 199.233.217.249 56667 - +1329843193.984222 arKYeMETxOg 141.142.220.235 37604 199.233.217.249 56666 - - - - - - - (empty) - - - - ftp-item-Rqjkzoroau4-0.dat +1329843193.984222 k6kgXLOoSKl 141.142.220.235 59378 199.233.217.249 56667 - - - - - - - (empty) - - - - ftp-item-BTsa70Ua9x7-1.dat +1329843179.926563 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test RETR ftp://199.233.217.249/./robots.txt text/plain 77 226 Transfer complete. (empty) - - - - - +1329843194.040188 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PORT 141,142,220,235,131,46 - - 200 PORT command successful. (empty) F 199.233.217.249 141.142.220.235 33582 - +1329843194.095782 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test LIST - - - 226 Transfer complete. (empty) - - - - - +1329843197.672179 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PORT 141,142,220,235,147,203 - - 200 PORT command successful. (empty) F 199.233.217.249 141.142.220.235 37835 - +1329843199.968212 nQcgTWjvg4c 199.233.217.249 61920 141.142.220.235 33582 - - - - - - - (empty) - - - - ftp-item-VLQvJybrm38-2.dat +1329843197.727769 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test RETR ftp://199.233.217.249/./robots.txt text/plain 77 226 Transfer complete. (empty) - - - - - +1329843200.079930 j4u32Pc5bif 199.233.217.249 61918 141.142.220.235 37835 - - - - - - - (empty) - - - - ftp-item-zrfwSs9K1yk-3.dat +#close 2013-04-12-16-32-25 diff --git a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv4/ftp.log b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv4/ftp.log index 6b05d924d3..afa4c97830 100644 --- a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv4/ftp.log +++ b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv4/ftp.log @@ -3,13 +3,13 @@ #empty_field (empty) #unset_field - #path ftp -#open 2013-03-27-17-47-22 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p user password command arg mime_type mime_desc file_size reply_code reply_msg tags data_channel.passive data_channel.orig_h data_channel.resp_h data_channel.resp_p extraction_file -#types time string addr port addr port string string string string string string count count string table[string] bool addr addr port string -1329843175.680248 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PASV - - - - 227 Entering Passive Mode (199,233,217,249,221,90) (empty) T 141.142.220.235 199.233.217.249 56666 - -1329843179.815947 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PASV - - - - 227 Entering Passive Mode (199,233,217,249,221,91) (empty) T 141.142.220.235 199.233.217.249 56667 - -1329843179.926563 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test RETR ftp://199.233.217.249/./robots.txt text/plain ASCII text 77 226 Transfer complete. (empty) - - - - - -1329843194.040188 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PORT 141,142,220,235,131,46 - - - 200 PORT command successful. (empty) F 199.233.217.249 141.142.220.235 33582 - -1329843197.672179 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PORT 141,142,220,235,147,203 - - - 200 PORT command successful. (empty) F 199.233.217.249 141.142.220.235 37835 - -1329843197.727769 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test RETR ftp://199.233.217.249/./robots.txt text/plain ASCII text, with CRLF line terminators 77 226 Transfer complete. (empty) - - - - - -#close 2013-03-27-17-47-22 +#open 2013-04-12-16-32-25 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p user password command arg mime_type file_size reply_code reply_msg tags data_channel.passive data_channel.orig_h data_channel.resp_h data_channel.resp_p extraction_file +#types time string addr port addr port string string string string string count count string table[string] bool addr addr port string +1329843175.680248 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PASV - - - 227 Entering Passive Mode (199,233,217,249,221,90) (empty) T 141.142.220.235 199.233.217.249 56666 - +1329843179.815947 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PASV - - - 227 Entering Passive Mode (199,233,217,249,221,91) (empty) T 141.142.220.235 199.233.217.249 56667 - +1329843179.926563 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test RETR ftp://199.233.217.249/./robots.txt text/plain 77 226 Transfer complete. (empty) - - - - - +1329843194.040188 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PORT 141,142,220,235,131,46 - - 200 PORT command successful. (empty) F 199.233.217.249 141.142.220.235 33582 - +1329843197.672179 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test PORT 141,142,220,235,147,203 - - 200 PORT command successful. (empty) F 199.233.217.249 141.142.220.235 37835 - +1329843197.727769 UWkUyAuUGXf 141.142.220.235 50003 199.233.217.249 21 anonymous test RETR ftp://199.233.217.249/./robots.txt text/plain 77 226 Transfer complete. (empty) - - - - - +#close 2013-04-12-16-32-25 diff --git a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv6/ftp.log b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv6/ftp.log index ea0c07a0c9..85207806c4 100644 --- a/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv6/ftp.log +++ b/testing/btest/Baseline/scripts.base.protocols.ftp.ftp-ipv6/ftp.log @@ -3,14 +3,14 @@ #empty_field (empty) #unset_field - #path ftp -#open 2013-03-27-17-50-35 -#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p user password command arg mime_type mime_desc file_size reply_code reply_msg tags data_channel.passive data_channel.orig_h data_channel.resp_h data_channel.resp_p extraction_file -#types time string addr port addr port string string string string string string count count string table[string] bool addr addr port string -1329327783.207785 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPSV - - - - 229 Entering Extended Passive Mode (|||57086|) (empty) T 2001:470:1f11:81f:c999:d94:aa7c:2e3e 2001:470:4867:99::21 57086 - -1329327786.415755 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPSV - - - - 229 Entering Extended Passive Mode (|||57087|) (empty) T 2001:470:1f11:81f:c999:d94:aa7c:2e3e 2001:470:4867:99::21 57087 - -1329327787.180814 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPSV - - - - 229 Entering Extended Passive Mode (|||57088|) (empty) T 2001:470:1f11:81f:c999:d94:aa7c:2e3e 2001:470:4867:99::21 57088 - -1329327787.396984 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test RETR ftp://[2001:470:4867:99::21]/robots.txt - - 77 226 Transfer complete. (empty) - - - - - -1329327795.355248 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPRT |2|2001:470:1f11:81f:c999:d94:aa7c:2e3e|49189| - - - 200 EPRT command successful. (empty) F 2001:470:4867:99::21 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49189 - -1329327795.463946 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test RETR ftp://[2001:470:4867:99::21]/robots.txt - - 77 226 Transfer complete. (empty) - - - - - -1329327799.799327 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPRT |2|2001:470:1f11:81f:c999:d94:aa7c:2e3e|49190| - - - 200 EPRT command successful. (empty) F 2001:470:4867:99::21 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49190 - -#close 2013-03-27-17-50-35 +#open 2013-04-12-16-32-25 +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p user password command arg mime_type file_size reply_code reply_msg tags data_channel.passive data_channel.orig_h data_channel.resp_h data_channel.resp_p extraction_file +#types time string addr port addr port string string string string string count count string table[string] bool addr addr port string +1329327783.207785 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPSV - - - 229 Entering Extended Passive Mode (|||57086|) (empty) T 2001:470:1f11:81f:c999:d94:aa7c:2e3e 2001:470:4867:99::21 57086 - +1329327786.415755 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPSV - - - 229 Entering Extended Passive Mode (|||57087|) (empty) T 2001:470:1f11:81f:c999:d94:aa7c:2e3e 2001:470:4867:99::21 57087 - +1329327787.180814 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPSV - - - 229 Entering Extended Passive Mode (|||57088|) (empty) T 2001:470:1f11:81f:c999:d94:aa7c:2e3e 2001:470:4867:99::21 57088 - +1329327787.396984 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test RETR ftp://[2001:470:4867:99::21]/robots.txt - 77 226 Transfer complete. (empty) - - - - - +1329327795.355248 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPRT |2|2001:470:1f11:81f:c999:d94:aa7c:2e3e|49189| - - 200 EPRT command successful. (empty) F 2001:470:4867:99::21 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49189 - +1329327795.463946 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test RETR ftp://[2001:470:4867:99::21]/robots.txt - 77 226 Transfer complete. (empty) - - - - - +1329327799.799327 UWkUyAuUGXf 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49185 2001:470:4867:99::21 21 anonymous test EPRT |2|2001:470:1f11:81f:c999:d94:aa7c:2e3e|49190| - - 200 EPRT command successful. (empty) F 2001:470:4867:99::21 2001:470:1f11:81f:c999:d94:aa7c:2e3e 49190 - +#close 2013-04-12-16-32-25 diff --git a/testing/btest/Baseline/scripts.base.protocols.http.100-continue/http.log b/testing/btest/Baseline/scripts.base.protocols.http.100-continue/http.log index f6920ac6b3..8053b3a287 100644 --- a/testing/btest/Baseline/scripts.base.protocols.http.100-continue/http.log +++ b/testing/btest/Baseline/scripts.base.protocols.http.100-continue/http.log @@ -6,5 +6,5 @@ #open 2013-03-22-14-38-28 #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extraction_file #types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string string -1237440095.634312 UWkUyAuUGXf 192.168.3.103 54102 128.146.216.51 80 1 POST www.osu.edu / - curl/7.17.1 (i386-apple-darwin8.11.1) libcurl/7.17.1 zlib/1.2.3 2001 60731 200 OK 100 Continue - (empty) - - - text/html; charset=us-ascii - - +1237440095.634312 UWkUyAuUGXf 192.168.3.103 54102 128.146.216.51 80 1 POST www.osu.edu / - curl/7.17.1 (i386-apple-darwin8.11.1) libcurl/7.17.1 zlib/1.2.3 2001 60731 200 OK 100 Continue - (empty) - - - text/html - - #close 2013-03-22-14-38-28 diff --git a/testing/btest/Baseline/scripts.base.protocols.http.http-extract-files/http.log b/testing/btest/Baseline/scripts.base.protocols.http.http-extract-files/http.log index f42a66f796..789896072f 100644 --- a/testing/btest/Baseline/scripts.base.protocols.http.http-extract-files/http.log +++ b/testing/btest/Baseline/scripts.base.protocols.http.http-extract-files/http.log @@ -6,5 +6,5 @@ #open 2013-03-22-14-38-28 #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extraction_file #types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string string -1128727435.634189 arKYeMETxOg 141.42.64.125 56730 125.190.109.199 80 1 GET www.icir.org / - Wget/1.10 0 9130 200 OK - - - (empty) - - - text/html; charset=us-ascii - http-item-BFymS6bFgT3-0.dat +1128727435.634189 arKYeMETxOg 141.42.64.125 56730 125.190.109.199 80 1 GET www.icir.org / - Wget/1.10 0 9130 200 OK - - - (empty) - - - text/html - http-item-BFymS6bFgT3-0.dat #close 2013-03-22-14-38-28 diff --git a/testing/btest/Baseline/scripts.base.protocols.http.http-methods/http.log b/testing/btest/Baseline/scripts.base.protocols.http.http-methods/http.log index bdc29bd6b9..9dafcc74e0 100644 --- a/testing/btest/Baseline/scripts.base.protocols.http.http-methods/http.log +++ b/testing/btest/Baseline/scripts.base.protocols.http.http-methods/http.log @@ -6,53 +6,53 @@ #open 2013-03-25-20-20-22 #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extraction_file #types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string string -1354328870.191989 UWkUyAuUGXf 128.2.6.136 46562 173.194.75.103 80 1 OPTIONS www.google.com * - - 0 962 405 Method Not Allowed - - - (empty) - - - text/html; charset=utf-8 - - -1354328874.237327 arKYeMETxOg 128.2.6.136 46563 173.194.75.103 80 1 OPTIONS www.google.com HTTP/1.1 - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328874.299063 k6kgXLOoSKl 128.2.6.136 46564 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328874.342591 nQcgTWjvg4c 128.2.6.136 46565 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328874.364020 j4u32Pc5bif 128.2.6.136 46566 173.194.75.103 80 1 GET www.google.com / - - 0 43911 200 OK - - - (empty) - - - text/html; charset=us-ascii - - -1354328878.470424 TEfuqmmG4bh 128.2.6.136 46567 173.194.75.103 80 1 GET www.google.com / - - 0 43983 200 OK - - - (empty) - - - text/html; charset=us-ascii - - -1354328882.575456 FrJExwHcSal 128.2.6.136 46568 173.194.75.103 80 1 GET www.google.com /HTTP/1.1 - - 0 1207 403 Forbidden - - - (empty) - - - text/html; charset=us-ascii - - -1354328882.928027 5OKnoww6xl4 128.2.6.136 46569 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328882.968948 3PKsZ2Uye21 128.2.6.136 46570 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328882.990373 VW0XPVINV8a 128.2.6.136 46571 173.194.75.103 80 1 GET www.google.com / - - 0 43913 200 OK - - - (empty) - - - text/html; charset=us-ascii - - -1354328887.114613 fRFu0wcOle6 128.2.6.136 46572 173.194.75.103 80 0 - - - - - 0 961 405 Method Not Allowed - - - (empty) - - - text/html; charset=utf-8 - - -1354328891.161077 qSsw6ESzHV4 128.2.6.136 46573 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328891.204740 iE6yhOq3SF 128.2.6.136 46574 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328891.245592 GSxOnSLghOa 128.2.6.136 46575 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328891.287655 qCaWGmzFtM5 128.2.6.136 46576 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328891.309065 70MGiRM1Qf4 128.2.6.136 46577 173.194.75.103 80 1 CCM_POST www.google.com / - - 0 963 405 Method Not Allowed - - - (empty) - - - text/html; charset=utf-8 - - -1354328895.355012 h5DsfNtYzi1 128.2.6.136 46578 173.194.75.103 80 1 CCM_POST www.google.com /HTTP/1.1 - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328895.416133 P654jzLoe3a 128.2.6.136 46579 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328895.459490 Tw8jXtpTGu6 128.2.6.136 46580 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328895.480865 c4Zw9TmAE05 128.2.6.136 46581 173.194.75.103 80 1 CCM_POST www.google.com / - - 0 963 405 Method Not Allowed - - - (empty) - - - text/html; charset=utf-8 - - -1354328899.526682 EAr0uf4mhq 128.2.6.136 46582 173.194.75.103 80 1 CONNECT www.google.com / - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328903.572533 GvmoxJFXdTa 128.2.6.136 46583 173.194.75.103 80 1 CONNECT www.google.com /HTTP/1.1 - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328903.634196 0Q4FH8sESw5 128.2.6.136 46584 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328903.676395 slFea8xwSmb 128.2.6.136 46585 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328903.697693 UfGkYA2HI2g 128.2.6.136 46586 173.194.75.103 80 1 CONNECT www.google.com / - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328907.743696 i2rO3KD1Syg 128.2.6.136 46587 173.194.75.103 80 1 TRACE www.google.com / - - 0 960 405 Method Not Allowed - - - (empty) - - - text/html; charset=utf-8 - - -1354328911.790590 2cx26uAvUPl 128.2.6.136 46588 173.194.75.103 80 1 TRACE www.google.com /HTTP/1.1 - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328911.853464 BWaU4aSuwkc 128.2.6.136 46589 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328911.897044 10XodEwRycf 128.2.6.136 46590 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328911.918511 zno26fFZkrh 128.2.6.136 46591 173.194.75.103 80 1 TRACE www.google.com / - - 0 960 405 Method Not Allowed - - - (empty) - - - text/html; charset=utf-8 - - -1354328915.964678 v5rgkJBig5l 128.2.6.136 46592 173.194.75.103 80 1 DELETE www.google.com / - - 0 961 405 Method Not Allowed - - - (empty) - - - text/html; charset=utf-8 - - -1354328920.010458 eWZCH7OONC1 128.2.6.136 46593 173.194.75.103 80 1 DELETE www.google.com /HTTP/1.1 - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328920.072101 0Pwk3ntf8O3 128.2.6.136 46594 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328920.114526 0HKorjr8Zp7 128.2.6.136 46595 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328920.136714 yC2d6kVg709 128.2.6.136 46596 173.194.75.103 80 1 DELETE www.google.com / - - 0 961 405 Method Not Allowed - - - (empty) - - - text/html; charset=utf-8 - - -1354328924.183211 VcgagLjnO92 128.2.6.136 46597 173.194.75.103 80 1 PUT www.google.com / - - 0 934 411 Length Required - - - (empty) - - - text/html; charset=utf-8 - - -1354328924.224567 bdRoHfaPBo3 128.2.6.136 46598 173.194.75.103 80 1 PUT www.google.com /HTTP/1.1 - - 0 934 411 Length Required - - - (empty) - - - text/html; charset=utf-8 - - -1354328924.287402 zHqb7t7kv28 128.2.6.136 46599 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328924.328257 rrZWoMUQpv8 128.2.6.136 46600 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328924.350343 xNYSS2hJkle 128.2.6.136 46601 173.194.75.103 80 1 PUT www.google.com / - - 0 934 411 Length Required - - - (empty) - - - text/html; charset=utf-8 - - -1354328924.391728 vMVjlplKKbd 128.2.6.136 46602 173.194.75.103 80 1 POST www.google.com / - - 0 934 411 Length Required - - - (empty) - - - text/html; charset=utf-8 - - -1354328924.433150 3omNawSNrxj 128.2.6.136 46603 173.194.75.103 80 1 POST www.google.com /HTTP/1.1 - - 0 934 411 Length Required - - - (empty) - - - text/html; charset=utf-8 - - -1354328924.496732 Rv8AJVfi9Zi 128.2.6.136 46604 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328924.537671 wEyF3OvvcQe 128.2.6.136 46605 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - +1354328870.191989 UWkUyAuUGXf 128.2.6.136 46562 173.194.75.103 80 1 OPTIONS www.google.com * - - 0 962 405 Method Not Allowed - - - (empty) - - - text/html - - +1354328874.237327 arKYeMETxOg 128.2.6.136 46563 173.194.75.103 80 1 OPTIONS www.google.com HTTP/1.1 - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328874.299063 k6kgXLOoSKl 128.2.6.136 46564 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328874.342591 nQcgTWjvg4c 128.2.6.136 46565 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328874.364020 j4u32Pc5bif 128.2.6.136 46566 173.194.75.103 80 1 GET www.google.com / - - 0 43911 200 OK - - - (empty) - - - text/html - - +1354328878.470424 TEfuqmmG4bh 128.2.6.136 46567 173.194.75.103 80 1 GET www.google.com / - - 0 43983 200 OK - - - (empty) - - - text/html - - +1354328882.575456 FrJExwHcSal 128.2.6.136 46568 173.194.75.103 80 1 GET www.google.com /HTTP/1.1 - - 0 1207 403 Forbidden - - - (empty) - - - text/html - - +1354328882.928027 5OKnoww6xl4 128.2.6.136 46569 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328882.968948 3PKsZ2Uye21 128.2.6.136 46570 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328882.990373 VW0XPVINV8a 128.2.6.136 46571 173.194.75.103 80 1 GET www.google.com / - - 0 43913 200 OK - - - (empty) - - - text/html - - +1354328887.114613 fRFu0wcOle6 128.2.6.136 46572 173.194.75.103 80 0 - - - - - 0 961 405 Method Not Allowed - - - (empty) - - - text/html - - +1354328891.161077 qSsw6ESzHV4 128.2.6.136 46573 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328891.204740 iE6yhOq3SF 128.2.6.136 46574 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328891.245592 GSxOnSLghOa 128.2.6.136 46575 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328891.287655 qCaWGmzFtM5 128.2.6.136 46576 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328891.309065 70MGiRM1Qf4 128.2.6.136 46577 173.194.75.103 80 1 CCM_POST www.google.com / - - 0 963 405 Method Not Allowed - - - (empty) - - - text/html - - +1354328895.355012 h5DsfNtYzi1 128.2.6.136 46578 173.194.75.103 80 1 CCM_POST www.google.com /HTTP/1.1 - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328895.416133 P654jzLoe3a 128.2.6.136 46579 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328895.459490 Tw8jXtpTGu6 128.2.6.136 46580 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328895.480865 c4Zw9TmAE05 128.2.6.136 46581 173.194.75.103 80 1 CCM_POST www.google.com / - - 0 963 405 Method Not Allowed - - - (empty) - - - text/html - - +1354328899.526682 EAr0uf4mhq 128.2.6.136 46582 173.194.75.103 80 1 CONNECT www.google.com / - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328903.572533 GvmoxJFXdTa 128.2.6.136 46583 173.194.75.103 80 1 CONNECT www.google.com /HTTP/1.1 - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328903.634196 0Q4FH8sESw5 128.2.6.136 46584 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328903.676395 slFea8xwSmb 128.2.6.136 46585 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328903.697693 UfGkYA2HI2g 128.2.6.136 46586 173.194.75.103 80 1 CONNECT www.google.com / - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328907.743696 i2rO3KD1Syg 128.2.6.136 46587 173.194.75.103 80 1 TRACE www.google.com / - - 0 960 405 Method Not Allowed - - - (empty) - - - text/html - - +1354328911.790590 2cx26uAvUPl 128.2.6.136 46588 173.194.75.103 80 1 TRACE www.google.com /HTTP/1.1 - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328911.853464 BWaU4aSuwkc 128.2.6.136 46589 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328911.897044 10XodEwRycf 128.2.6.136 46590 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328911.918511 zno26fFZkrh 128.2.6.136 46591 173.194.75.103 80 1 TRACE www.google.com / - - 0 960 405 Method Not Allowed - - - (empty) - - - text/html - - +1354328915.964678 v5rgkJBig5l 128.2.6.136 46592 173.194.75.103 80 1 DELETE www.google.com / - - 0 961 405 Method Not Allowed - - - (empty) - - - text/html - - +1354328920.010458 eWZCH7OONC1 128.2.6.136 46593 173.194.75.103 80 1 DELETE www.google.com /HTTP/1.1 - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328920.072101 0Pwk3ntf8O3 128.2.6.136 46594 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328920.114526 0HKorjr8Zp7 128.2.6.136 46595 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328920.136714 yC2d6kVg709 128.2.6.136 46596 173.194.75.103 80 1 DELETE www.google.com / - - 0 961 405 Method Not Allowed - - - (empty) - - - text/html - - +1354328924.183211 VcgagLjnO92 128.2.6.136 46597 173.194.75.103 80 1 PUT www.google.com / - - 0 934 411 Length Required - - - (empty) - - - text/html - - +1354328924.224567 bdRoHfaPBo3 128.2.6.136 46598 173.194.75.103 80 1 PUT www.google.com /HTTP/1.1 - - 0 934 411 Length Required - - - (empty) - - - text/html - - +1354328924.287402 zHqb7t7kv28 128.2.6.136 46599 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328924.328257 rrZWoMUQpv8 128.2.6.136 46600 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328924.350343 xNYSS2hJkle 128.2.6.136 46601 173.194.75.103 80 1 PUT www.google.com / - - 0 934 411 Length Required - - - (empty) - - - text/html - - +1354328924.391728 vMVjlplKKbd 128.2.6.136 46602 173.194.75.103 80 1 POST www.google.com / - - 0 934 411 Length Required - - - (empty) - - - text/html - - +1354328924.433150 3omNawSNrxj 128.2.6.136 46603 173.194.75.103 80 1 POST www.google.com /HTTP/1.1 - - 0 934 411 Length Required - - - (empty) - - - text/html - - +1354328924.496732 Rv8AJVfi9Zi 128.2.6.136 46604 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328924.537671 wEyF3OvvcQe 128.2.6.136 46605 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - 1354328924.559704 E490YZTUozc 128.2.6.136 46606 173.194.75.103 80 1 HEAD www.google.com / - - 0 0 200 OK - - - (empty) - - - - - - 1354328928.625437 YIeWJmXWNWj 128.2.6.136 46607 173.194.75.103 80 1 HEAD www.google.com / - - 0 0 200 OK - - - (empty) - - - - - - 1354328932.692706 ydiZblvsYri 128.2.6.136 46608 173.194.75.103 80 1 HEAD www.google.com /HTTP/1.1 - - 0 0 400 Bad Request - - - (empty) - - - - - - -1354328932.754657 HFYOnBqSE5e 128.2.6.136 46609 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - -1354328932.796568 JcUvhfWUMgd 128.2.6.136 46610 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html; charset=utf-8 - - +1354328932.754657 HFYOnBqSE5e 128.2.6.136 46609 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - +1354328932.796568 JcUvhfWUMgd 128.2.6.136 46610 173.194.75.103 80 0 - - - - - 0 925 400 Bad Request - - - (empty) - - - text/html - - #close 2013-03-25-20-20-22 diff --git a/testing/btest/Baseline/scripts.base.protocols.http.http-mime-and-md5/http.log b/testing/btest/Baseline/scripts.base.protocols.http.http-mime-and-md5/http.log index 61b1e16a2f..6073e9b563 100644 --- a/testing/btest/Baseline/scripts.base.protocols.http.http-mime-and-md5/http.log +++ b/testing/btest/Baseline/scripts.base.protocols.http.http-mime-and-md5/http.log @@ -6,9 +6,9 @@ #open 2013-03-22-16-25-59 #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extraction_file #types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] string string string -1258577884.844956 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 1 GET www.mozilla.org /style/enhanced.css http://www.mozilla.org/projects/calendar/ Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 2675 200 OK - - - (empty) - - - text/plain; charset=us-ascii - - -1258577884.960135 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 2 GET www.mozilla.org /script/urchin.js http://www.mozilla.org/projects/calendar/ Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 21421 200 OK - - - (empty) - - - text/plain; charset=us-ascii - - -1258577885.317160 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 3 GET www.mozilla.org /images/template/screen/bullet_utility.png http://www.mozilla.org/style/screen.css Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 94 200 OK - - - (empty) - - - image/gif; charset=binary - - -1258577885.349639 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 4 GET www.mozilla.org /images/template/screen/key-point-top.png http://www.mozilla.org/style/screen.css Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 2349 200 OK - - - (empty) - - - image/png; charset=binary e0029eea80812e9a8e57b8d05d52938a - -1258577885.394612 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 5 GET www.mozilla.org /projects/calendar/images/header-sunbird.png http://www.mozilla.org/projects/calendar/calendar.css Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 27579 200 OK - - - (empty) - - - image/png; charset=binary 30aa926344f58019d047e85ba049ca1e - +1258577884.844956 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 1 GET www.mozilla.org /style/enhanced.css http://www.mozilla.org/projects/calendar/ Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 2675 200 OK - - - (empty) - - - text/plain - - +1258577884.960135 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 2 GET www.mozilla.org /script/urchin.js http://www.mozilla.org/projects/calendar/ Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 21421 200 OK - - - (empty) - - - text/plain - - +1258577885.317160 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 3 GET www.mozilla.org /images/template/screen/bullet_utility.png http://www.mozilla.org/style/screen.css Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 94 200 OK - - - (empty) - - - image/gif - - +1258577885.349639 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 4 GET www.mozilla.org /images/template/screen/key-point-top.png http://www.mozilla.org/style/screen.css Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 2349 200 OK - - - (empty) - - - image/png e0029eea80812e9a8e57b8d05d52938a - +1258577885.394612 UWkUyAuUGXf 192.168.1.104 1673 63.245.209.11 80 5 GET www.mozilla.org /projects/calendar/images/header-sunbird.png http://www.mozilla.org/projects/calendar/calendar.css Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5 0 27579 200 OK - - - (empty) - - - image/png 30aa926344f58019d047e85ba049ca1e - #close 2013-03-22-16-25-59 diff --git a/testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/smtp_entities.log b/testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/smtp_entities.log index 0ee8782362..0051ddba61 100644 --- a/testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/smtp_entities.log +++ b/testing/btest/Baseline/scripts.base.protocols.smtp.mime-extract/smtp_entities.log @@ -6,7 +6,7 @@ #open 2013-03-26-20-43-14 #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth filename content_len mime_type md5 extraction_file excerpt #types time string addr port addr port count string count string string string string -1254722770.692743 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 - 79 text/plain; charset=us-ascii - smtp-entity-cwR7l6Zctxb-0.dat (empty) -1254722770.692743 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 - 1918 text/html; charset=us-ascii - - (empty) -1254722770.692804 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 NEWS.txt 10823 text/plain; charset=us-ascii - smtp-entity-Ltd7QO7jEv3-1.dat (empty) +1254722770.692743 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 - 79 text/plain - smtp-entity-cwR7l6Zctxb-0.dat (empty) +1254722770.692743 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 - 1918 text/html - - (empty) +1254722770.692804 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 NEWS.txt 10823 text/plain - smtp-entity-Ltd7QO7jEv3-1.dat (empty) #close 2013-03-26-20-43-14 diff --git a/testing/btest/Baseline/scripts.base.protocols.smtp.mime/smtp_entities.log b/testing/btest/Baseline/scripts.base.protocols.smtp.mime/smtp_entities.log index 0a342c48dc..135c644855 100644 --- a/testing/btest/Baseline/scripts.base.protocols.smtp.mime/smtp_entities.log +++ b/testing/btest/Baseline/scripts.base.protocols.smtp.mime/smtp_entities.log @@ -6,7 +6,7 @@ #open 2013-03-26-20-39-07 #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth filename content_len mime_type md5 extraction_file excerpt #types time string addr port addr port count string count string string string string -1254722770.692743 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 - 79 text/plain; charset=us-ascii 92bca2e6cdcde73647125da7dccbdd07 - (empty) -1254722770.692743 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 - 1918 text/html; charset=us-ascii - - (empty) -1254722770.692804 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 NEWS.txt 10823 text/plain; charset=us-ascii a968bb0f9f9d95835b2e74c845877e87 - (empty) +1254722770.692743 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 - 79 text/plain 92bca2e6cdcde73647125da7dccbdd07 - (empty) +1254722770.692743 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 - 1918 text/html - - (empty) +1254722770.692804 arKYeMETxOg 10.10.1.4 1470 74.53.140.153 25 1 NEWS.txt 10823 text/plain a968bb0f9f9d95835b2e74c845877e87 - (empty) #close 2013-03-26-20-39-07 diff --git a/testing/btest/btest.cfg b/testing/btest/btest.cfg index d86b45d8a9..4f888f710b 100644 --- a/testing/btest/btest.cfg +++ b/testing/btest/btest.cfg @@ -7,6 +7,7 @@ IgnoreFiles = *.tmp *.swp #* *.trace .DS_Store [environment] BROPATH=`bash -c %(testbase)s/../../build/bro-path-dev` +BROMAGIC=%(testbase)s/../../magic BRO_SEED_FILE=%(testbase)s/random.seed TZ=UTC LC_ALL=C diff --git a/testing/external/subdir-btest.cfg b/testing/external/subdir-btest.cfg index 1239150208..b631ba2457 100644 --- a/testing/external/subdir-btest.cfg +++ b/testing/external/subdir-btest.cfg @@ -7,6 +7,7 @@ IgnoreFiles = *.tmp *.swp #* *.trace .gitignore *.skeleton [environment] BROPATH=`bash -c %(testbase)s/../../../build/bro-path-dev`:%(testbase)s/../scripts +BROMAGIC=%(testbase)s/../../../magic BRO_SEED_FILE=%(testbase)s/../random.seed TZ=UTC LC_ALL=C @@ -17,4 +18,4 @@ TRACES=%(testbase)s/Traces SCRIPTS=%(testbase)s/../scripts DIST=%(testbase)s/../../.. BUILD=%(testbase)s/../../../build -#BRO_PROFILER_FILE=%(testbase)s/.tmp/script-coverage.XXXXXX +BRO_PROFILER_FILE=%(testbase)s/.tmp/script-coverage.XXXXXX diff --git a/testing/scripts/diff-canonifier b/testing/scripts/diff-canonifier index aef9173cc8..3cb213a3f7 100755 --- a/testing/scripts/diff-canonifier +++ b/testing/scripts/diff-canonifier @@ -2,5 +2,4 @@ # # Default canonifier used with the tests in testing/btest/*. -`dirname $0`/diff-remove-timestamps \ - | `dirname $0`/diff-remove-mime-types +`dirname $0`/diff-remove-timestamps diff --git a/testing/scripts/diff-canonifier-external b/testing/scripts/diff-canonifier-external index e788a4a1bb..04ef699538 100755 --- a/testing/scripts/diff-canonifier-external +++ b/testing/scripts/diff-canonifier-external @@ -4,7 +4,6 @@ `dirname $0`/diff-remove-timestamps \ | `dirname $0`/diff-remove-uids \ - | `dirname $0`/diff-remove-mime-types \ | `dirname $0`/diff-remove-x509-names \ | `dirname $0`/diff-canon-notice-policy \ | `dirname $0`/diff-sort diff --git a/testing/scripts/file-analysis-test.bro b/testing/scripts/file-analysis-test.bro index be8b4eadd6..15929dd4f6 100644 --- a/testing/scripts/file-analysis-test.bro +++ b/testing/scripts/file-analysis-test.bro @@ -49,10 +49,8 @@ event file_new(f: fa_file) if ( f?$mime_type ) { - print "FILE_TYPE"; - print "mime type is set"; - # not actually printing the values due to libmagic variances - f$mime_type = "set"; + print "MIME_TYPE"; + print f$mime_type; } } From cd2a6aa33a1df2c16fe93d0554d1e9c10822d660 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Thu, 18 Apr 2013 18:09:48 -0500 Subject: [PATCH 06/10] FileAnalysis: workarounds for older libmagics. Some of the unit tests revealed different versions of libmagic could give different mime types for the same input file and magic database. One way that could happen is because of the use of hardcoded/builtin token (word) comparisons for ascii files -- MAGIC_NO_CHECK_TOKENS flag will prevent that from being used (and it's obsoleted in newer libmagics). The other problem looked like a bug fixed as of 5.05 where a match in the magic database that doesn't have a verbose description but does have a mime type won't actually return that mime type due to the the missing description. The one case where that kept popping up was in 5.04 not beign able to identify application/x-dosexec, so I added a description to the top-level match for that to workaround the issue. --- cmake | 2 +- magic/msdos | 2 +- src/util.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake b/cmake index 1bfdacb892..e1a7fd00a0 160000 --- a/cmake +++ b/cmake @@ -1 +1 @@ -Subproject commit 1bfdacb8921ab0b40099f5fde7a611167bf310c3 +Subproject commit e1a7fd00a0a66d6831a239fe84f5fcfaa54e2c35 diff --git a/magic/msdos b/magic/msdos index 6182801f20..59a9d2caac 100644 --- a/magic/msdos +++ b/magic/msdos @@ -20,7 +20,7 @@ # # Many of the compressed formats were extraced from IDARC 1.23 source code. # -0 string/b MZ +0 string/b MZ DOS MZ !:mime application/x-dosexec # All non-DOS EXE extensions have the relocation table more than 0x40 bytes into the file. >0x18 leshort <0x40 MS-DOS executable diff --git a/src/util.cc b/src/util.cc index 0a9b035e96..8d5a404792 100644 --- a/src/util.cc +++ b/src/util.cc @@ -1568,7 +1568,7 @@ void bro_init_magic(magic_t* cookie_ptr, int flags) if ( ! cookie_ptr || *cookie_ptr ) return; - *cookie_ptr = magic_open(flags); + *cookie_ptr = magic_open(flags|MAGIC_NO_CHECK_TOKENS); // Use our custom database for mime types, but the default database // from libmagic for the verbose file type. From 98f7907dbb934475febecb49791bc82346f1d3e3 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Fri, 19 Apr 2013 11:38:11 -0500 Subject: [PATCH 07/10] FileAnalysis: optimize file handle construction. cat is slightly faster than fmt. --- scripts/base/protocols/ftp/file-analysis.bro | 2 +- scripts/base/protocols/http/file-analysis.bro | 6 +++--- scripts/base/protocols/irc/file-analysis.bro | 2 +- scripts/base/protocols/smtp/file-analysis.bro | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/base/protocols/ftp/file-analysis.bro b/scripts/base/protocols/ftp/file-analysis.bro index ca80d53bba..b26d8a942b 100644 --- a/scripts/base/protocols/ftp/file-analysis.bro +++ b/scripts/base/protocols/ftp/file-analysis.bro @@ -11,7 +11,7 @@ export { function get_handle_string(c: connection): string { - return fmt("%s %s %s", ANALYZER_FTP_DATA, c$start_time, id_string(c$id)); + return cat(ANALYZER_FTP_DATA, " ", c$start_time, " ", id_string(c$id)); } function get_file_handle(c: connection, is_orig: bool): string diff --git a/scripts/base/protocols/http/file-analysis.bro b/scripts/base/protocols/http/file-analysis.bro index ae5fe0f979..fc537f3477 100644 --- a/scripts/base/protocols/http/file-analysis.bro +++ b/scripts/base/protocols/http/file-analysis.bro @@ -15,11 +15,11 @@ function get_file_handle(c: connection, is_orig: bool): string if ( ! c?$http ) return ""; if ( c$http$range_request ) - return fmt("%s %s %s %s", ANALYZER_HTTP, is_orig, c$id$orig_h, + return cat(ANALYZER_HTTP, " ", is_orig, " ", c$id$orig_h, " ", build_url(c$http)); - return fmt("%s %s %s %s %s", ANALYZER_HTTP, c$start_time, is_orig, - c$http$trans_depth, id_string(c$id)); + return cat(ANALYZER_HTTP, " ", c$start_time, " ", is_orig, " ", + c$http$trans_depth, " ", id_string(c$id)); } module GLOBAL; diff --git a/scripts/base/protocols/irc/file-analysis.bro b/scripts/base/protocols/irc/file-analysis.bro index 858b39fee2..94d9f95d73 100644 --- a/scripts/base/protocols/irc/file-analysis.bro +++ b/scripts/base/protocols/irc/file-analysis.bro @@ -12,7 +12,7 @@ export { function get_file_handle(c: connection, is_orig: bool): string { if ( is_orig ) return ""; - return fmt("%s %s %s", ANALYZER_IRC_DATA, c$start_time, id_string(c$id)); + return cat(ANALYZER_IRC_DATA, " ", c$start_time, " ", id_string(c$id)); } module GLOBAL; diff --git a/scripts/base/protocols/smtp/file-analysis.bro b/scripts/base/protocols/smtp/file-analysis.bro index 85c455ea18..cbe109eff3 100644 --- a/scripts/base/protocols/smtp/file-analysis.bro +++ b/scripts/base/protocols/smtp/file-analysis.bro @@ -13,8 +13,8 @@ export { function get_file_handle(c: connection, is_orig: bool): string { if ( ! c?$smtp ) return ""; - return fmt("%s %s %s %s", ANALYZER_SMTP, c$start_time, - c$smtp$trans_depth, c$smtp_state$mime_level); + return cat(ANALYZER_SMTP, " ", c$start_time, " ", c$smtp$trans_depth, " ", + c$smtp_state$mime_level); } module GLOBAL; From c1f37dde5a5e9a9915c85da4b763b9b4d0c488b2 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Fri, 19 Apr 2013 11:55:48 -0500 Subject: [PATCH 08/10] FileAnalysis: optimizate connection set updating. Don't need to be checking/updating that for sequential data input, which won't be over multiple conns. --- src/file_analysis/Manager.cc | 9 ++++++--- src/file_analysis/Manager.h | 3 ++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index 31d548f4e4..c0de853710 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -65,7 +65,9 @@ void Manager::DataIn(const u_char* data, uint64 len, AnalyzerTag::Tag tag, { if ( IsDisabled(tag) ) return; GetFileHandle(tag, conn, is_orig); - DataIn(data, len, GetFile(current_handle, conn, tag)); + // Sequential data input shouldn't be going over multiple conns, so don't + // do the check to update connection set. + DataIn(data, len, GetFile(current_handle, conn, tag, false)); } void Manager::DataIn(const u_char* data, uint64 len, const string& unique) @@ -186,7 +188,7 @@ bool Manager::RemoveAnalyzer(const FileID& file_id, const RecordVal* args) const } File* Manager::GetFile(const string& unique, Connection* conn, - AnalyzerTag::Tag tag) + AnalyzerTag::Tag tag, bool update_conn) { if ( unique.empty() ) return 0; if ( IsIgnored(unique) ) return 0; @@ -211,7 +213,8 @@ File* Manager::GetFile(const string& unique, Connection* conn, else { rval->UpdateLastActivityTime(); - rval->UpdateConnectionFields(conn); + if ( update_conn ) + rval->UpdateConnectionFields(conn); } return rval; diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index f22c919736..c27444fb65 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -134,7 +134,8 @@ protected: * fields. */ File* GetFile(const string& unique, Connection* conn = 0, - AnalyzerTag::Tag tag = AnalyzerTag::Error); + AnalyzerTag::Tag tag = AnalyzerTag::Error, + bool update_conn = true); /** * @return the File object mapped to \a file_id, or a null pointer if no From cd0a8bfbdb83c292934d6971b5c6e66f45d82b9f Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Fri, 19 Apr 2013 16:27:32 -0500 Subject: [PATCH 09/10] FileAnalysis: inlined doc fixes. --- src/event.bif | 10 +++++----- src/file_analysis.bif | 12 +++++------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/event.bif b/src/event.bif index dc5dda51bb..638a7ce141 100644 --- a/src/event.bif +++ b/src/event.bif @@ -7001,7 +7001,7 @@ event event_queue_flush_point%(%); event get_file_handle%(tag: count, c: connection, is_orig: bool%); ## Indicates that a analysis of a new file has begun. The analysis can be -## augmented at this time via :bro:see:`FileAnalysis::add_action`. +## augmented at this time via :bro:see:`FileAnalysis::add_analyzer`. ## ## f: The file. ## @@ -7024,8 +7024,8 @@ event file_over_new_connection%(f: fa_file, c: connection%); ## f: The file. ## ## .. bro:see:: file_new file_over_new_connection file_gap file_state_remove -## default_file_timeout_interval FileAnalysis::postpone_timeout -## FileAnalysis::set_timeout_interval +## default_file_timeout_interval FileAnalysis::postpone_timeout +## FileAnalysis::set_timeout_interval event file_timeout%(f: fa_file%); ## Indicates that a chunk of the file is missing. @@ -7054,8 +7054,8 @@ event file_state_remove%(f: fa_file%); ## ## hash: The result of the hashing. ## -## .. bro:see:: FileAnalysis::add_action FileAnalysis::ACTION_MD5 -## FileAnalysis::ACTION_SHA1 FileAnalysis::ACTION_SHA256 +## .. bro:see:: FileAnalysis::add_analyzer FileAnalysis::ANALYZER_MD5 +## FileAnalysis::ANALYZER_SHA1 FileAnalysis::ANALYZER_SHA256 event file_hash%(f: fa_file, kind: string, hash: string%); ## Deprecated. Will be removed. diff --git a/src/file_analysis.bif b/src/file_analysis.bif index 67e692aacf..a2ef2b3e9f 100644 --- a/src/file_analysis.bif +++ b/src/file_analysis.bif @@ -91,16 +91,14 @@ function FileAnalysis::__eof%(source: string%): any module GLOBAL; -## For use within a :bro:see:`get_file_handle` handler to return a unique -## identifier to associate with some buffered input to the file analysis -## framework. The buffered data will then immediately be allowed to pass -## pass through the file analysis framework and execute any policy hooks -## that are available. If an empty string is returned, that signifies that -## the buffered data will be discarded with no further action taken on it. +## For use within a :bro:see:`get_file_handle` handler to set a unique +## identifier to associate with the current input to the file analysis +## framework. Using an empty string for the handle signifies that the +## input will be ignored/discarded. ## ## handle: A string that uniquely identifies a file. ## -## .. bro:see:: get_file_handle FileAnalysis::policy +## .. bro:see:: get_file_handle function set_file_handle%(handle: string%): any %{ file_mgr->SetHandle(handle->CheckString()); From f07760ba00cc0ddf2563c52064dda3df513c2cf1 Mon Sep 17 00:00:00 2001 From: Jon Siwek Date: Tue, 23 Apr 2013 10:50:43 -0500 Subject: [PATCH 10/10] FileAnalysis: add is_orig field to fa_file & Info. --- scripts/base/frameworks/file-analysis/main.bro | 5 +++++ scripts/base/init-bare.bro | 4 ++++ src/file_analysis/File.cc | 8 ++++++-- src/file_analysis/File.h | 3 ++- src/file_analysis/Manager.cc | 12 ++++++------ src/file_analysis/Manager.h | 2 +- .../file_analysis.log | 10 +++++----- 7 files changed, 29 insertions(+), 15 deletions(-) diff --git a/scripts/base/frameworks/file-analysis/main.bro b/scripts/base/frameworks/file-analysis/main.bro index 649ab5d43c..142709dcc4 100644 --- a/scripts/base/frameworks/file-analysis/main.bro +++ b/scripts/base/frameworks/file-analysis/main.bro @@ -46,6 +46,10 @@ export { ## path which was read, or some other input source. source: string &log &optional; + ## If the source of this file is is a network connection, this field + ## may be set to indicate the directionality. + is_orig: bool &log &optional; + ## The time at which the last activity for the file was seen. last_active: time &log; @@ -236,6 +240,7 @@ function set_info(f: fa_file) f$info$id = f$id; if ( f?$parent_id ) f$info$parent_id = f$parent_id; if ( f?$source ) f$info$source = f$source; + if ( f?$is_orig ) f$info$is_orig = f$is_orig; f$info$last_active = f$last_active; f$info$seen_bytes = f$seen_bytes; if ( f?$total_bytes ) f$info$total_bytes = f$total_bytes; diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro index 8aeeac478c..c4245d9052 100644 --- a/scripts/base/init-bare.bro +++ b/scripts/base/init-bare.bro @@ -341,6 +341,10 @@ type fa_file: record { ## path which was read, or some other input source. source: string &optional; + ## If the source of this file is is a network connection, this field + ## may be set to indicate the directionality. + is_orig: bool &optional; + ## The set of connections over which the file was transferred. conns: table[conn_id] of connection &optional; diff --git a/src/file_analysis/File.cc b/src/file_analysis/File.cc index ffd281119b..f70257a4af 100644 --- a/src/file_analysis/File.cc +++ b/src/file_analysis/File.cc @@ -37,6 +37,7 @@ static RecordVal* get_conn_id_val(const Connection* conn) int File::id_idx = -1; int File::parent_id_idx = -1; int File::source_idx = -1; +int File::is_orig_idx = -1; int File::conns_idx = -1; int File::last_active_idx = -1; int File::seen_bytes_idx = -1; @@ -59,6 +60,7 @@ void File::StaticInit() id_idx = Idx("id"); parent_id_idx = Idx("parent_id"); source_idx = Idx("source"); + is_orig_idx = Idx("is_orig"); conns_idx = Idx("conns"); last_active_idx = Idx("last_active"); seen_bytes_idx = Idx("seen_bytes"); @@ -75,7 +77,8 @@ void File::StaticInit() salt = BifConst::FileAnalysis::salt->CheckString(); } -File::File(const string& unique, Connection* conn, AnalyzerTag::Tag tag) +File::File(const string& unique, Connection* conn, AnalyzerTag::Tag tag, + bool is_orig) : id(""), unique(unique), val(0), postpone_timeout(false), first_chunk(true), missed_bof(false), need_reassembly(false), done(false), analyzers(this) @@ -98,8 +101,9 @@ File::File(const string& unique, Connection* conn, AnalyzerTag::Tag tag) if ( conn ) { - // add source and connection fields + // add source, connection, is_orig fields val->Assign(source_idx, new StringVal(::Analyzer::GetTagName(tag))); + val->Assign(is_orig_idx, new Val(is_orig, TYPE_BOOL)); UpdateConnectionFields(conn); } else diff --git a/src/file_analysis/File.h b/src/file_analysis/File.h index 8705bce60b..07d8d66825 100644 --- a/src/file_analysis/File.h +++ b/src/file_analysis/File.h @@ -133,7 +133,7 @@ protected: * Constructor; only file_analysis::Manager should be creating these. */ File(const string& unique, Connection* conn = 0, - AnalyzerTag::Tag tag = AnalyzerTag::Error); + AnalyzerTag::Tag tag = AnalyzerTag::Error, bool is_orig = false); /** * Updates the "conn_ids" and "conn_uids" fields in #val record with the @@ -214,6 +214,7 @@ protected: static int id_idx; static int parent_id_idx; static int source_idx; + static int is_orig_idx; static int conns_idx; static int last_active_idx; static int seen_bytes_idx; diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index c0de853710..e316fdc66a 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -40,7 +40,7 @@ void Manager::DataIn(const u_char* data, uint64 len, uint64 offset, if ( IsDisabled(tag) ) return; GetFileHandle(tag, conn, is_orig); - DataIn(data, len, offset, GetFile(current_handle, conn, tag)); + DataIn(data, len, offset, GetFile(current_handle, conn, tag, is_orig)); } void Manager::DataIn(const u_char* data, uint64 len, uint64 offset, @@ -67,7 +67,7 @@ void Manager::DataIn(const u_char* data, uint64 len, AnalyzerTag::Tag tag, GetFileHandle(tag, conn, is_orig); // Sequential data input shouldn't be going over multiple conns, so don't // do the check to update connection set. - DataIn(data, len, GetFile(current_handle, conn, tag, false)); + DataIn(data, len, GetFile(current_handle, conn, tag, is_orig, false)); } void Manager::DataIn(const u_char* data, uint64 len, const string& unique) @@ -110,7 +110,7 @@ void Manager::Gap(uint64 offset, uint64 len, AnalyzerTag::Tag tag, if ( IsDisabled(tag) ) return; GetFileHandle(tag, conn, is_orig); - Gap(offset, len, GetFile(current_handle, conn, tag)); + Gap(offset, len, GetFile(current_handle, conn, tag, is_orig)); } void Manager::Gap(uint64 offset, uint64 len, const string& unique) @@ -131,7 +131,7 @@ void Manager::SetSize(uint64 size, AnalyzerTag::Tag tag, Connection* conn, if ( IsDisabled(tag) ) return; GetFileHandle(tag, conn, is_orig); - SetSize(size, GetFile(current_handle, conn, tag)); + SetSize(size, GetFile(current_handle, conn, tag, is_orig)); } void Manager::SetSize(uint64 size, const string& unique) @@ -188,7 +188,7 @@ bool Manager::RemoveAnalyzer(const FileID& file_id, const RecordVal* args) const } File* Manager::GetFile(const string& unique, Connection* conn, - AnalyzerTag::Tag tag, bool update_conn) + AnalyzerTag::Tag tag, bool is_orig, bool update_conn) { if ( unique.empty() ) return 0; if ( IsIgnored(unique) ) return 0; @@ -197,7 +197,7 @@ File* Manager::GetFile(const string& unique, Connection* conn, if ( ! rval ) { - rval = str_map[unique] = new File(unique, conn, tag); + rval = str_map[unique] = new File(unique, conn, tag, is_orig); FileID id = rval->GetID(); if ( id_map[id] ) diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index c27444fb65..16aec474cd 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -135,7 +135,7 @@ protected: */ File* GetFile(const string& unique, Connection* conn = 0, AnalyzerTag::Tag tag = AnalyzerTag::Error, - bool update_conn = true); + bool is_orig = false, bool update_conn = true); /** * @return the File object mapped to \a file_id, or a null pointer if no diff --git a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log index dba789ab13..86f132470b 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log +++ b/testing/btest/Baseline/scripts.base.frameworks.file-analysis.logging/file_analysis.log @@ -3,8 +3,8 @@ #empty_field (empty) #unset_field - #path file_analysis -#open 2013-04-12-14-46-48 -#fields id parent_id source last_active seen_bytes total_bytes missing_bytes overflow_bytes timeout_interval bof_buffer_size mime_type timedout conn_uids analyzers extracted_files md5 sha1 sha256 -#types string string string time count count count count interval count string bool table[string] table[enum] table[string] string string string -Cx92a0ym5R8 - HTTP 1362692527.009775 4705 4705 0 0 120.000000 1024 text/plain F UWkUyAuUGXf FileAnalysis::ANALYZER_SHA1,FileAnalysis::ANALYZER_EXTRACT,FileAnalysis::ANALYZER_DATA_EVENT,FileAnalysis::ANALYZER_MD5,FileAnalysis::ANALYZER_SHA256 Cx92a0ym5R8-file 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18 -#close 2013-04-12-14-46-48 +#open 2013-04-23-15-41-01 +#fields id parent_id source is_orig last_active seen_bytes total_bytes missing_bytes overflow_bytes timeout_interval bof_buffer_size mime_type timedout conn_uids analyzers extracted_files md5 sha1 sha256 +#types string string string bool time count count count count interval count string bool table[string] table[enum] table[string] string string string +Cx92a0ym5R8 - HTTP F 1362692527.009775 4705 4705 0 0 120.000000 1024 text/plain F UWkUyAuUGXf FileAnalysis::ANALYZER_SHA1,FileAnalysis::ANALYZER_EXTRACT,FileAnalysis::ANALYZER_DATA_EVENT,FileAnalysis::ANALYZER_MD5,FileAnalysis::ANALYZER_SHA256 Cx92a0ym5R8-file 397168fd09991a0e712254df7bc639ac 1dd7ac0398df6cbc0696445a91ec681facf4dc47 4e7c7ef0984119447e743e3ec77e1de52713e345cde03fe7df753a35849bed18 +#close 2013-04-23-15-41-01