zeek/src/file_analysis/File.h
Jon Siwek 995368e68c Remove variable content from weird names
This changes many weird names to move non-static content from the
weird name into the "addl" field to help ensure the total number of
weird names is reasonably bounded.  Note the net_weird and flow_weird
events do not have an "addl" parameter, so information may no longer
be available in those cases -- to make it available again we'd need
to either (1) define new events that contain such a parameter, or
(2) change net_weird/flow_weird event signature (which is a breaking
change for user-code at the moment).

Also, the generic handling of binpac exceptions for analyzers which
to not otherwise catch and handle them has been changed from a Weird
to a ProtocolViolation.

Finally, a new "file_weird" event has been added for reporting
weirdness found during file analysis.
2019-04-01 18:30:11 -07:00

361 lines
11 KiB
C++

// See the file "COPYING" in the main distribution directory for copyright.
#ifndef FILE_ANALYSIS_FILE_H
#define FILE_ANALYSIS_FILE_H
#include <string>
#include <utility>
#include <vector>
#include "FileReassembler.h"
#include "Conn.h"
#include "Val.h"
#include "Tag.h"
#include "AnalyzerSet.h"
#include "BroString.h"
#include "WeirdState.h"
namespace file_analysis {
class FileReassembler;
/**
* Wrapper class around \c fa_file record values from script layer.
*/
class File {
public:
/**
* Destructor. Nothing fancy, releases a reference to the wrapped
* \c fa_file value.
*/
~File();
/**
* @return the wrapped \c fa_file record value, #val.
*/
RecordVal* GetVal() const { return val; }
/**
* @return the value of the "source" field from #val record or an empty
* string if it's not initialized.
*/
string GetSource() const;
/**
* Set the "source" field from #val record to \a source.
* @param source the new value of the "source" field.
*/
void SetSource(const string& source);
/**
* @return value (seconds) of the "timeout_interval" field from #val record.
*/
double GetTimeoutInterval() const;
/**
* Set the "timeout_interval" field from #val record to \a interval seconds.
* @param interval the new value of the "timeout_interval" field.
*/
void SetTimeoutInterval(double interval);
/**
* Change the maximum size that an attached extraction analyzer is allowed.
* @param args the file extraction analyzer whose limit needs changed.
* @param bytes new limit.
* @return false if no extraction analyzer is active, else true.
*/
bool SetExtractionLimit(RecordVal* args, uint64 bytes);
/**
* @return value of the "id" field from #val record.
*/
string GetID() const { return id; }
/**
* @return value of "last_active" field in #val record;
*/
double GetLastActivityTime() const;
/**
* Refreshes "last_active" field of #val record with current network time.
*/
void UpdateLastActivityTime();
/**
* Set "total_bytes" field of #val record to \a size.
* @param size the new value of the "total_bytes" field.
*/
void SetTotalBytes(uint64 size);
/**
* @return true if file analysis is complete for the file, else false.
* It is incomplete if the total size is unknown or if the number of bytes
* streamed to analyzers (either as data delivers or gap information)
* matches the known total size.
*/
bool IsComplete() const;
/**
* Create a timer to be dispatched after the amount of time indicated by
* the "timeout_interval" field of the #val record in order to check if
* "last_active" field is old enough to timeout analysis of the file.
*/
void ScheduleInactivityTimer() const;
/**
* Queues attaching an analyzer. Only one analyzer per type can be attached
* at a time unless the arguments differ.
* @param tag the analyzer tag of the file analyzer to add.
* @param args an \c AnalyzerArgs value representing a file analyzer.
* @return false if analyzer can't be instantiated, else true.
*/
bool AddAnalyzer(file_analysis::Tag tag, RecordVal* args);
/**
* Queues removal of an analyzer.
* @param tag the analyzer tag of the file analyzer to remove.
* @param args an \c AnalyzerArgs value representing a file analyzer.
* @return true if analyzer was active at time of call, else false.
*/
bool RemoveAnalyzer(file_analysis::Tag tag, RecordVal* args);
/**
* Signal that this analyzer can be deleted once it's safe to do so.
*/
void DoneWithAnalyzer(Analyzer* analyzer);
/**
* Pass in non-sequential data and deliver to attached analyzers.
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
* @param offset number of bytes from start of file at which chunk occurs.
*/
void DataIn(const u_char* data, uint64 len, uint64 offset);
/**
* Pass in sequential data and deliver to attached analyzers.
* @param data pointer to start of a chunk of file data.
* @param len number of bytes in the data chunk.
*/
void DataIn(const u_char* data, uint64 len);
/**
* Inform attached analyzers about end of file being seen.
*/
void EndOfFile();
/**
* Inform attached analyzers about a gap in file stream.
* @param offset number of bytes in to file at which missing chunk starts.
* @param len length in bytes of the missing chunk of file data.
*/
void Gap(uint64 offset, uint64 len);
/**
* @param h pointer to an event handler.
* @return true if event has a handler and the file isn't ignored.
*/
bool FileEventAvailable(EventHandlerPtr h);
/**
* Raises an event related to the file's life-cycle, the only parameter
* to that event is the \c fa_file record..
* @param h pointer to an event handler.
*/
void FileEvent(EventHandlerPtr h);
/**
* Raises an event related to the file's life-cycle.
* @param h pointer to an event handler.
* @param vl list of argument values to pass to event call.
*/
void FileEvent(EventHandlerPtr h, val_list* vl);
/**
* Sets the MIME type for a file to a specific value.
*
* Setting the MIME type has to be done before the MIME type is
* inferred from the content, and before any data is passed to the
* analyzer (the beginning of file buffer has to be empty). After
* data has been sent or a MIME type has been set once, it cannot be
* changed.
*
* This function should only be called when it does not make sense
* to perform automated MIME type detections. This is e.g. the case
* in protocols where the file type is fixed in the protocol description.
* This is for example the case for TLS and X.509 certificates.
*
* @param mime_type mime type to set
* @return true if the mime type was set. False if it could not be set because
* a mime type was already set or inferred.
*/
bool SetMime(const string& mime_type);
/**
* Whether to permit a weird to carry on through the full reporter/weird
* framework.
*/
bool PermitWeird(const char* name, uint64 threshold, uint64 rate,
double duration);
protected:
friend class Manager;
friend class FileReassembler;
/**
* Constructor; only file_analysis::Manager should be creating these.
* @param file_id an identifier string for the file in pretty hash form
* (similar to connection uids).
* @param source_name the value for the source field to fill in.
* @param conn a network connection over which the file is transferred.
* @param tag the network protocol over which the file is transferred.
* @param is_orig true if the file is being transferred from the originator
* of the connection to the responder. False indicates the other
* direction.
*/
File(const string& file_id, const string& source_name, Connection* conn = 0,
analyzer::Tag tag = analyzer::Tag::Error, bool is_orig = false);
/**
* Updates the "conn_ids" and "conn_uids" fields in #val record with the
* \c conn_id and UID taken from \a conn.
* @param conn the connection over which a part of the file has been seen.
* @param is_orig true if the connection originator is sending the file.
* @return true if the connection was previously unknown.
*/
bool UpdateConnectionFields(Connection* conn, bool is_orig);
/**
* Raise the file_over_new_connection event with given arguments.
*/
void RaiseFileOverNewConnection(Connection* conn, bool is_orig);
/**
* Increment a byte count field of #val record by \a size.
* @param size number of bytes by which to increment.
* @param field_idx the index of the field in \c fa_file to increment.
*/
void IncrementByteCount(uint64 size, int field_idx);
/**
* Wrapper to RecordVal::LookupWithDefault for the field in #val at index
* \a idx which automatically unrefs the Val and returns a converted value.
* @param idx the index of a field of type "count" in \c fa_file.
* @return the value of the field, which may be it &default.
*/
uint64 LookupFieldDefaultCount(int idx) const;
/**
* Wrapper to RecordVal::LookupWithDefault for the field in #val at index
* \a idx which automatically unrefs the Val and returns a converted value.
* @param idx the index of a field of type "interval" in \c fa_file.
* @return the value of the field, which may be it &default.
*/
double LookupFieldDefaultInterval(int idx) const;
/**
* Buffers incoming data at the beginning of a file.
* @param data pointer to a data chunk to buffer.
* @param len number of bytes in the data chunk.
* @return true if buffering is still required, else false
*/
bool BufferBOF(const u_char* data, uint64 len);
/**
* Does metadata inference (e.g. mime type detection via file
* magic signatures) using data in the BOF (beginning-of-file) buffer
* and raises an event with the metadata.
*/
void InferMetadata();
/**
* Enables reassembly on the file.
*/
void EnableReassembly();
/**
* Disables reassembly on the file. If there is an existing reassembler
* for the file, this will cause it to be deleted and won't allow a new
* one to be created until reassembly is reenabled.
*/
void DisableReassembly();
/**
* Set a maximum allowed bytes of memory for file reassembly for this file.
*/
void SetReassemblyBuffer(uint64 max);
/**
* Perform stream-wise delivery for analyzers that need it.
*/
void DeliverStream(const u_char* data, uint64 len);
/**
* Perform chunk-wise delivery for analyzers that need it.
*/
void DeliverChunk(const u_char* data, uint64 len, uint64 offset);
/**
* Lookup a record field index/offset by name.
* @param field_name the name of the record field.
* @param type the record type for which the field will be looked up.
* @return the field offset in #val record corresponding to \a field_name.
*/
static int Idx(const string& field_name, const RecordType* type);
/**
* Initializes static member.
*/
static void StaticInit();
protected:
string id; /**< A pretty hash that likely identifies file */
RecordVal* val; /**< \c fa_file from script layer. */
FileReassembler* file_reassembler; /**< A reassembler for the file if it's needed. */
uint64 stream_offset; /**< The offset of the file which has been forwarded. */
uint64 reassembly_max_buffer; /**< Maximum allowed buffer for reassembly. */
bool did_metadata_inference; /**< Whether the metadata inference has already been attempted. */
bool reassembly_enabled; /**< Whether file stream reassembly is needed. */
bool postpone_timeout; /**< Whether postponing timeout is requested. */
bool done; /**< If this object is about to be deleted. */
AnalyzerSet analyzers; /**< A set of attached file analyzers. */
std::list<Analyzer *> done_analyzers; /**< Analyzers we're done with, remembered here until they can be safely deleted. */
struct BOF_Buffer {
BOF_Buffer() : full(false), size(0) {}
~BOF_Buffer()
{ for ( size_t i = 0; i < chunks.size(); ++i ) delete chunks[i]; }
bool full;
uint64 size;
BroString::CVec chunks;
} bof_buffer; /**< Beginning of file buffer. */
WeirdStateMap weird_state;
static int id_idx;
static int parent_id_idx;
static int source_idx;
static int is_orig_idx;
static int conns_idx;
static int last_active_idx;
static int seen_bytes_idx;
static int total_bytes_idx;
static int missing_bytes_idx;
static int overflow_bytes_idx;
static int timeout_interval_idx;
static int bof_buffer_size_idx;
static int bof_buffer_idx;
static int mime_type_idx;
static int mime_types_idx;
static int meta_inferred_idx;
static int meta_mime_type_idx;
static int meta_mime_types_idx;
};
} // namespace file_analysis
#endif