mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 06:38:20 +00:00

This changes many weird names to move non-static content from the weird name into the "addl" field to help ensure the total number of weird names is reasonably bounded. Note the net_weird and flow_weird events do not have an "addl" parameter, so information may no longer be available in those cases -- to make it available again we'd need to either (1) define new events that contain such a parameter, or (2) change net_weird/flow_weird event signature (which is a breaking change for user-code at the moment). Also, the generic handling of binpac exceptions for analyzers which to not otherwise catch and handle them has been changed from a Weird to a ProtocolViolation. Finally, a new "file_weird" event has been added for reporting weirdness found during file analysis.
361 lines
11 KiB
C++
361 lines
11 KiB
C++
// See the file "COPYING" in the main distribution directory for copyright.
|
|
|
|
#ifndef FILE_ANALYSIS_FILE_H
|
|
#define FILE_ANALYSIS_FILE_H
|
|
|
|
#include <string>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "FileReassembler.h"
|
|
#include "Conn.h"
|
|
#include "Val.h"
|
|
#include "Tag.h"
|
|
#include "AnalyzerSet.h"
|
|
#include "BroString.h"
|
|
#include "WeirdState.h"
|
|
|
|
namespace file_analysis {
|
|
|
|
class FileReassembler;
|
|
|
|
/**
|
|
* Wrapper class around \c fa_file record values from script layer.
|
|
*/
|
|
class File {
|
|
public:
|
|
|
|
/**
|
|
* Destructor. Nothing fancy, releases a reference to the wrapped
|
|
* \c fa_file value.
|
|
*/
|
|
~File();
|
|
|
|
/**
|
|
* @return the wrapped \c fa_file record value, #val.
|
|
*/
|
|
RecordVal* GetVal() const { return val; }
|
|
|
|
/**
|
|
* @return the value of the "source" field from #val record or an empty
|
|
* string if it's not initialized.
|
|
*/
|
|
string GetSource() const;
|
|
|
|
/**
|
|
* Set the "source" field from #val record to \a source.
|
|
* @param source the new value of the "source" field.
|
|
*/
|
|
void SetSource(const string& source);
|
|
|
|
/**
|
|
* @return value (seconds) of the "timeout_interval" field from #val record.
|
|
*/
|
|
double GetTimeoutInterval() const;
|
|
|
|
/**
|
|
* Set the "timeout_interval" field from #val record to \a interval seconds.
|
|
* @param interval the new value of the "timeout_interval" field.
|
|
*/
|
|
void SetTimeoutInterval(double interval);
|
|
|
|
/**
|
|
* Change the maximum size that an attached extraction analyzer is allowed.
|
|
* @param args the file extraction analyzer whose limit needs changed.
|
|
* @param bytes new limit.
|
|
* @return false if no extraction analyzer is active, else true.
|
|
*/
|
|
bool SetExtractionLimit(RecordVal* args, uint64 bytes);
|
|
|
|
/**
|
|
* @return value of the "id" field from #val record.
|
|
*/
|
|
string GetID() const { return id; }
|
|
|
|
/**
|
|
* @return value of "last_active" field in #val record;
|
|
*/
|
|
double GetLastActivityTime() const;
|
|
|
|
/**
|
|
* Refreshes "last_active" field of #val record with current network time.
|
|
*/
|
|
void UpdateLastActivityTime();
|
|
|
|
/**
|
|
* Set "total_bytes" field of #val record to \a size.
|
|
* @param size the new value of the "total_bytes" field.
|
|
*/
|
|
void SetTotalBytes(uint64 size);
|
|
|
|
/**
|
|
* @return true if file analysis is complete for the file, else false.
|
|
* It is incomplete if the total size is unknown or if the number of bytes
|
|
* streamed to analyzers (either as data delivers or gap information)
|
|
* matches the known total size.
|
|
*/
|
|
bool IsComplete() const;
|
|
|
|
/**
|
|
* Create a timer to be dispatched after the amount of time indicated by
|
|
* the "timeout_interval" field of the #val record in order to check if
|
|
* "last_active" field is old enough to timeout analysis of the file.
|
|
*/
|
|
void ScheduleInactivityTimer() const;
|
|
|
|
/**
|
|
* Queues attaching an analyzer. Only one analyzer per type can be attached
|
|
* at a time unless the arguments differ.
|
|
* @param tag the analyzer tag of the file analyzer to add.
|
|
* @param args an \c AnalyzerArgs value representing a file analyzer.
|
|
* @return false if analyzer can't be instantiated, else true.
|
|
*/
|
|
bool AddAnalyzer(file_analysis::Tag tag, RecordVal* args);
|
|
|
|
/**
|
|
* Queues removal of an analyzer.
|
|
* @param tag the analyzer tag of the file analyzer to remove.
|
|
* @param args an \c AnalyzerArgs value representing a file analyzer.
|
|
* @return true if analyzer was active at time of call, else false.
|
|
*/
|
|
bool RemoveAnalyzer(file_analysis::Tag tag, RecordVal* args);
|
|
|
|
/**
|
|
* Signal that this analyzer can be deleted once it's safe to do so.
|
|
*/
|
|
void DoneWithAnalyzer(Analyzer* analyzer);
|
|
|
|
/**
|
|
* Pass in non-sequential data and deliver to attached analyzers.
|
|
* @param data pointer to start of a chunk of file data.
|
|
* @param len number of bytes in the data chunk.
|
|
* @param offset number of bytes from start of file at which chunk occurs.
|
|
*/
|
|
void DataIn(const u_char* data, uint64 len, uint64 offset);
|
|
|
|
/**
|
|
* Pass in sequential data and deliver to attached analyzers.
|
|
* @param data pointer to start of a chunk of file data.
|
|
* @param len number of bytes in the data chunk.
|
|
*/
|
|
void DataIn(const u_char* data, uint64 len);
|
|
|
|
/**
|
|
* Inform attached analyzers about end of file being seen.
|
|
*/
|
|
void EndOfFile();
|
|
|
|
/**
|
|
* Inform attached analyzers about a gap in file stream.
|
|
* @param offset number of bytes in to file at which missing chunk starts.
|
|
* @param len length in bytes of the missing chunk of file data.
|
|
*/
|
|
void Gap(uint64 offset, uint64 len);
|
|
|
|
/**
|
|
* @param h pointer to an event handler.
|
|
* @return true if event has a handler and the file isn't ignored.
|
|
*/
|
|
bool FileEventAvailable(EventHandlerPtr h);
|
|
|
|
/**
|
|
* Raises an event related to the file's life-cycle, the only parameter
|
|
* to that event is the \c fa_file record..
|
|
* @param h pointer to an event handler.
|
|
*/
|
|
void FileEvent(EventHandlerPtr h);
|
|
|
|
/**
|
|
* Raises an event related to the file's life-cycle.
|
|
* @param h pointer to an event handler.
|
|
* @param vl list of argument values to pass to event call.
|
|
*/
|
|
void FileEvent(EventHandlerPtr h, val_list* vl);
|
|
|
|
|
|
/**
|
|
* Sets the MIME type for a file to a specific value.
|
|
*
|
|
* Setting the MIME type has to be done before the MIME type is
|
|
* inferred from the content, and before any data is passed to the
|
|
* analyzer (the beginning of file buffer has to be empty). After
|
|
* data has been sent or a MIME type has been set once, it cannot be
|
|
* changed.
|
|
*
|
|
* This function should only be called when it does not make sense
|
|
* to perform automated MIME type detections. This is e.g. the case
|
|
* in protocols where the file type is fixed in the protocol description.
|
|
* This is for example the case for TLS and X.509 certificates.
|
|
*
|
|
* @param mime_type mime type to set
|
|
* @return true if the mime type was set. False if it could not be set because
|
|
* a mime type was already set or inferred.
|
|
*/
|
|
bool SetMime(const string& mime_type);
|
|
|
|
/**
|
|
* Whether to permit a weird to carry on through the full reporter/weird
|
|
* framework.
|
|
*/
|
|
bool PermitWeird(const char* name, uint64 threshold, uint64 rate,
|
|
double duration);
|
|
|
|
protected:
|
|
friend class Manager;
|
|
friend class FileReassembler;
|
|
|
|
/**
|
|
* Constructor; only file_analysis::Manager should be creating these.
|
|
* @param file_id an identifier string for the file in pretty hash form
|
|
* (similar to connection uids).
|
|
* @param source_name the value for the source field to fill in.
|
|
* @param conn a network connection over which the file is transferred.
|
|
* @param tag the network protocol over which the file is transferred.
|
|
* @param is_orig true if the file is being transferred from the originator
|
|
* of the connection to the responder. False indicates the other
|
|
* direction.
|
|
*/
|
|
File(const string& file_id, const string& source_name, Connection* conn = 0,
|
|
analyzer::Tag tag = analyzer::Tag::Error, bool is_orig = false);
|
|
|
|
/**
|
|
* Updates the "conn_ids" and "conn_uids" fields in #val record with the
|
|
* \c conn_id and UID taken from \a conn.
|
|
* @param conn the connection over which a part of the file has been seen.
|
|
* @param is_orig true if the connection originator is sending the file.
|
|
* @return true if the connection was previously unknown.
|
|
*/
|
|
bool UpdateConnectionFields(Connection* conn, bool is_orig);
|
|
|
|
/**
|
|
* Raise the file_over_new_connection event with given arguments.
|
|
*/
|
|
void RaiseFileOverNewConnection(Connection* conn, bool is_orig);
|
|
|
|
/**
|
|
* Increment a byte count field of #val record by \a size.
|
|
* @param size number of bytes by which to increment.
|
|
* @param field_idx the index of the field in \c fa_file to increment.
|
|
*/
|
|
void IncrementByteCount(uint64 size, int field_idx);
|
|
|
|
/**
|
|
* Wrapper to RecordVal::LookupWithDefault for the field in #val at index
|
|
* \a idx which automatically unrefs the Val and returns a converted value.
|
|
* @param idx the index of a field of type "count" in \c fa_file.
|
|
* @return the value of the field, which may be it &default.
|
|
*/
|
|
uint64 LookupFieldDefaultCount(int idx) const;
|
|
|
|
/**
|
|
* Wrapper to RecordVal::LookupWithDefault for the field in #val at index
|
|
* \a idx which automatically unrefs the Val and returns a converted value.
|
|
* @param idx the index of a field of type "interval" in \c fa_file.
|
|
* @return the value of the field, which may be it &default.
|
|
*/
|
|
double LookupFieldDefaultInterval(int idx) const;
|
|
|
|
/**
|
|
* Buffers incoming data at the beginning of a file.
|
|
* @param data pointer to a data chunk to buffer.
|
|
* @param len number of bytes in the data chunk.
|
|
* @return true if buffering is still required, else false
|
|
*/
|
|
bool BufferBOF(const u_char* data, uint64 len);
|
|
|
|
/**
|
|
* Does metadata inference (e.g. mime type detection via file
|
|
* magic signatures) using data in the BOF (beginning-of-file) buffer
|
|
* and raises an event with the metadata.
|
|
*/
|
|
void InferMetadata();
|
|
|
|
/**
|
|
* Enables reassembly on the file.
|
|
*/
|
|
void EnableReassembly();
|
|
|
|
/**
|
|
* Disables reassembly on the file. If there is an existing reassembler
|
|
* for the file, this will cause it to be deleted and won't allow a new
|
|
* one to be created until reassembly is reenabled.
|
|
*/
|
|
void DisableReassembly();
|
|
|
|
/**
|
|
* Set a maximum allowed bytes of memory for file reassembly for this file.
|
|
*/
|
|
void SetReassemblyBuffer(uint64 max);
|
|
|
|
/**
|
|
* Perform stream-wise delivery for analyzers that need it.
|
|
*/
|
|
void DeliverStream(const u_char* data, uint64 len);
|
|
|
|
/**
|
|
* Perform chunk-wise delivery for analyzers that need it.
|
|
*/
|
|
void DeliverChunk(const u_char* data, uint64 len, uint64 offset);
|
|
|
|
/**
|
|
* Lookup a record field index/offset by name.
|
|
* @param field_name the name of the record field.
|
|
* @param type the record type for which the field will be looked up.
|
|
* @return the field offset in #val record corresponding to \a field_name.
|
|
*/
|
|
static int Idx(const string& field_name, const RecordType* type);
|
|
|
|
/**
|
|
* Initializes static member.
|
|
*/
|
|
static void StaticInit();
|
|
|
|
protected:
|
|
string id; /**< A pretty hash that likely identifies file */
|
|
RecordVal* val; /**< \c fa_file from script layer. */
|
|
FileReassembler* file_reassembler; /**< A reassembler for the file if it's needed. */
|
|
uint64 stream_offset; /**< The offset of the file which has been forwarded. */
|
|
uint64 reassembly_max_buffer; /**< Maximum allowed buffer for reassembly. */
|
|
bool did_metadata_inference; /**< Whether the metadata inference has already been attempted. */
|
|
bool reassembly_enabled; /**< Whether file stream reassembly is needed. */
|
|
bool postpone_timeout; /**< Whether postponing timeout is requested. */
|
|
bool done; /**< If this object is about to be deleted. */
|
|
AnalyzerSet analyzers; /**< A set of attached file analyzers. */
|
|
std::list<Analyzer *> done_analyzers; /**< Analyzers we're done with, remembered here until they can be safely deleted. */
|
|
|
|
struct BOF_Buffer {
|
|
BOF_Buffer() : full(false), size(0) {}
|
|
~BOF_Buffer()
|
|
{ for ( size_t i = 0; i < chunks.size(); ++i ) delete chunks[i]; }
|
|
|
|
bool full;
|
|
uint64 size;
|
|
BroString::CVec chunks;
|
|
} bof_buffer; /**< Beginning of file buffer. */
|
|
|
|
WeirdStateMap weird_state;
|
|
|
|
static int id_idx;
|
|
static int parent_id_idx;
|
|
static int source_idx;
|
|
static int is_orig_idx;
|
|
static int conns_idx;
|
|
static int last_active_idx;
|
|
static int seen_bytes_idx;
|
|
static int total_bytes_idx;
|
|
static int missing_bytes_idx;
|
|
static int overflow_bytes_idx;
|
|
static int timeout_interval_idx;
|
|
static int bof_buffer_size_idx;
|
|
static int bof_buffer_idx;
|
|
static int mime_type_idx;
|
|
static int mime_types_idx;
|
|
static int meta_inferred_idx;
|
|
|
|
static int meta_mime_type_idx;
|
|
static int meta_mime_types_idx;
|
|
};
|
|
|
|
} // namespace file_analysis
|
|
|
|
#endif
|