Merge remote-tracking branch 'origin/topic/jsiwek/bit-1368' into topic/seth/more-file-type-ident-fixes

Conflicts:
	src/file_analysis/File.cc
	testing/btest/Baseline/plugins.hooks/output
This commit is contained in:
Seth Hall 2015-04-20 09:36:40 -04:00
commit 038e4c24f6
35 changed files with 269 additions and 192 deletions

@ -1 +1 @@
Subproject commit f2e34d731ed29bb993fbb065846faa342a8c824f
Subproject commit d4c305df93d555ab468dbad4a5b69412bf44a833

View file

@ -10,6 +10,7 @@ RecordType* endpoint;
RecordType* endpoint_stats;
RecordType* connection_type;
RecordType* fa_file_type;
RecordType* inferred_file_metadata_type;
RecordType* icmp_conn;
RecordType* icmp_context;
RecordType* SYN_packet;
@ -316,6 +317,7 @@ void init_net_var()
endpoint_stats = internal_type("endpoint_stats")->AsRecordType();
connection_type = internal_type("connection")->AsRecordType();
fa_file_type = internal_type("fa_file")->AsRecordType();
inferred_file_metadata_type = internal_type("inferred_file_metadata")->AsRecordType();
icmp_conn = internal_type("icmp_conn")->AsRecordType();
icmp_context = internal_type("icmp_context")->AsRecordType();
signature_state = internal_type("signature_state")->AsRecordType();

View file

@ -13,6 +13,7 @@ extern RecordType* endpoint;
extern RecordType* endpoint_stats;
extern RecordType* connection_type;
extern RecordType* fa_file_type;
extern RecordType* inferred_file_metadata_type;
extern RecordType* icmp_conn;
extern RecordType* icmp_context;
extern RecordType* signature_state;

View file

@ -20,9 +20,6 @@ int case_insensitive = 0;
extern int RE_parse(void);
extern void RE_set_input(const char* str);
// If true, the set-wise matching always returns false - for benchmarking.
extern int rule_bench;
Specific_RE_Matcher::Specific_RE_Matcher(match_type arg_mt, int arg_multiline)
: equiv_class(NUM_SYM)
{
@ -279,9 +276,6 @@ inline void RE_Match_State::AddMatches(const AcceptingSet& as,
bool RE_Match_State::Match(const u_char* bv, int n,
bool bol, bool eol, bool clear)
{
if ( rule_bench > 0 )
return false;
if ( current_pos == -1 )
{
// First call to Match().

View file

@ -577,9 +577,6 @@ RuleFileMagicState* RuleMatcher::InitFileMagic() const
{
RuleFileMagicState* state = new RuleFileMagicState();
if ( rule_bench == 3 )
return state;
loop_over_list(root->psets[Rule::FILE_MAGIC], i)
{
RuleHdrTest::PatternSet* set = root->psets[Rule::FILE_MAGIC][i];
@ -630,9 +627,6 @@ RuleMatcher::MIME_Matches* RuleMatcher::Match(RuleFileMagicState* state,
return rval;
}
if ( rule_bench >= 2 )
return rval;
#ifdef DEBUG
if ( debug_logger.IsEnabled(DBG_RULES) )
{
@ -712,9 +706,6 @@ RuleEndpointState* RuleMatcher::InitEndpoint(analyzer::Analyzer* analyzer,
RuleEndpointState* state =
new RuleEndpointState(analyzer, from_orig, opposite, pia);
if ( rule_bench == 3 )
return state;
rule_hdr_test_list tests;
tests.append(root);
@ -837,9 +828,6 @@ void RuleMatcher::Match(RuleEndpointState* state, Rule::PatternType type,
// for 'accepted' (that depends on the average number of matching
// patterns).
if ( rule_bench >= 2 )
return;
bool newmatch = false;
#ifdef DEBUG
@ -956,9 +944,6 @@ void RuleMatcher::Match(RuleEndpointState* state, Rule::PatternType type,
void RuleMatcher::FinishEndpoint(RuleEndpointState* state)
{
if ( rule_bench == 3 )
return;
// Send EOL to payload matchers.
Match(state, Rule::PAYLOAD, (const u_char *) "", 0, false, true, false);
@ -1110,15 +1095,9 @@ void RuleMatcher::ExecRule(Rule* rule, RuleEndpointState* state, bool eos)
void RuleMatcher::ClearEndpointState(RuleEndpointState* state)
{
if ( rule_bench == 3 )
return;
state->payload_size = -1;
ExecPureRules(state, 1);
state->payload_size = -1;
state->matched_by_patterns.clear();
loop_over_list(state->matched_text, i)
delete state->matched_text[i];
state->matched_text.clear();
loop_over_list(state->matchers, j)
state->matchers[j]->state->Clear();
@ -1126,9 +1105,6 @@ void RuleMatcher::ClearEndpointState(RuleEndpointState* state)
void RuleMatcher::ClearFileMagicState(RuleFileMagicState* state) const
{
if ( rule_bench == 3 )
return;
loop_over_list(state->matchers, j)
state->matchers[j]->state->Clear();
}
@ -1496,8 +1472,12 @@ void RuleMatcherState::ClearMatchState(bool orig)
if ( ! rule_matcher )
return;
if ( orig_match_state )
rule_matcher->ClearEndpointState(orig_match_state);
if ( resp_match_state )
if ( orig )
{
if ( orig_match_state )
rule_matcher->ClearEndpointState(orig_match_state);
}
else if ( resp_match_state )
rule_matcher->ClearEndpointState(resp_match_state);
}

View file

@ -22,8 +22,6 @@
//#define MATCHER_PRINT_STATS
extern int rule_bench;
// Parser interface:
extern void rules_error(const char* msg);

View file

@ -81,7 +81,7 @@ void PIA::PIA_Done()
}
void PIA::PIA_DeliverPacket(int len, const u_char* data, bool is_orig, uint64 seq,
const IP_Hdr* ip, int caplen)
const IP_Hdr* ip, int caplen, bool clear_state)
{
if ( pkt_buffer.state == SKIPPING )
return;
@ -108,6 +108,9 @@ void PIA::PIA_DeliverPacket(int len, const u_char* data, bool is_orig, uint64 se
// FIXME: I'm not sure why it does not work with eol=true...
DoMatch(data, len, is_orig, true, false, false, ip);
if ( clear_state )
RuleMatcherState::ClearMatchState(is_orig);
pkt_buffer.state = new_state;
current_packet.data = 0;

View file

@ -42,7 +42,7 @@ public:
protected:
void PIA_Done();
void PIA_DeliverPacket(int len, const u_char* data, bool is_orig,
uint64 seq, const IP_Hdr* ip, int caplen);
uint64 seq, const IP_Hdr* ip, int caplen, bool clear_state);
enum State { INIT, BUFFERING, MATCHING_ONLY, SKIPPING } state;
@ -109,7 +109,7 @@ protected:
uint64 seq, const IP_Hdr* ip, int caplen)
{
Analyzer::DeliverPacket(len, data, is_orig, seq, ip, caplen);
PIA_DeliverPacket(len, data, is_orig, seq, ip, caplen);
PIA_DeliverPacket(len, data, is_orig, seq, ip, caplen, true);
}
virtual void ActivateAnalyzer(analyzer::Tag tag, const Rule* rule);
@ -154,7 +154,7 @@ protected:
uint64 seq, const IP_Hdr* ip, int caplen)
{
Analyzer::DeliverPacket(len, data, is_orig, seq, ip, caplen);
PIA_DeliverPacket(len, data, is_orig, seq, ip, caplen);
PIA_DeliverPacket(len, data, is_orig, seq, ip, caplen, false);
}
virtual void DeliverStream(int len, const u_char* data, bool is_orig);

View file

@ -905,8 +905,8 @@ event get_file_handle%(tag: Analyzer::Tag, c: connection, is_orig: bool%);
##
## f: The file.
##
## .. bro:see:: file_over_new_connection file_timeout file_gap file_mime_type
## file_state_remove
## .. bro:see:: file_over_new_connection file_timeout file_gap
## file_metadata_inferred file_state_remove
event file_new%(f: fa_file%);
## Indicates that a file has been seen being transferred over a connection
@ -918,39 +918,30 @@ event file_new%(f: fa_file%);
##
## is_orig: true if the originator of *c* is the one sending the file.
##
## .. bro:see:: file_new file_timeout file_gap file_mime_type
## .. bro:see:: file_new file_timeout file_gap file_metadata_inferred
## file_state_remove
event file_over_new_connection%(f: fa_file, c: connection, is_orig: bool%);
## Provide the most likely matching MIME type for this file. The analysis
## can be augmented at this time via :bro:see:`Files::add_analyzer`.
## Provide all metadata that has been inferred about a particular file
## from inspection of the initial content that been seen at the beginning
## of the file. The analysis can be augmented at this time via
## :bro:see:`Files::add_analyzer`.
##
## f: The file.
##
## mime_type: The mime type that was discovered.
## meta: Metadata that's been discovered about the file.
##
## .. bro:see:: file_over_new_connection file_timeout file_gap file_mime_type
## file_mime_types file_state_remove
event file_mime_type%(f: fa_file, mime_type: string%);
## Provide all matching MIME types for this file. The analysis can be
## augmented at this time via :bro:see:`Files::add_analyzer`.
##
## f: The file.
##
## mime_types: The mime types that were discovered.
##
## .. bro:see:: file_over_new_connection file_timeout file_gap file_mime_type
## file_mime_types file_state_remove
event file_mime_types%(f: fa_file, mime_types: mime_matches%);
## .. bro:see:: file_over_new_connection file_timeout file_gap
## file_state_remove
event file_metadata_inferred%(f: fa_file, meta: inferred_file_metadata%);
## Indicates that file analysis has timed out because no activity was seen
## for the file in a while.
##
## f: The file.
##
## .. bro:see:: file_new file_over_new_connection file_gap file_mime_type
## file_mime_types file_state_remove default_file_timeout_interval
## .. bro:see:: file_new file_over_new_connection file_gap
## file_metadata_inferred file_state_remove default_file_timeout_interval
## Files::set_timeout_interval
event file_timeout%(f: fa_file%);
@ -962,8 +953,8 @@ event file_timeout%(f: fa_file%);
##
## len: The number of missing bytes.
##
## .. bro:see:: file_new file_over_new_connection file_timeout file_mime_type
## file_mime_types file_state_remove file_reassembly_overflow
## .. bro:see:: file_new file_over_new_connection file_timeout
## file_metadata_inferred file_state_remove file_reassembly_overflow
event file_gap%(f: fa_file, offset: count, len: count%);
## Indicates that the file had an overflow of the reassembly buffer.
@ -978,10 +969,11 @@ event file_gap%(f: fa_file, offset: count, len: count%);
## file data and get back under the reassembly buffer size limit.
## This value will also be represented as a gap.
##
## .. bro:see:: file_new file_over_new_connection file_timeout file_mime_type
## file_mime_types file_state_remove file_gap Files::enable_reassembler
## Files::reassembly_buffer_size Files::enable_reassembly
## Files::disable_reassembly Files::set_reassembly_buffer_size
## .. bro:see:: file_new file_over_new_connection file_timeout
## file_metadata_inferred file_state_remove file_gap
## Files::enable_reassembler Files::reassembly_buffer_size
## Files::enable_reassembly Files::disable_reassembly
## Files::set_reassembly_buffer_size
event file_reassembly_overflow%(f: fa_file, offset: count, skipped: count%);
## This event is generated each time file analysis is ending for a given file.
@ -989,7 +981,7 @@ event file_reassembly_overflow%(f: fa_file, offset: count, skipped: count%);
## f: The file.
##
## .. bro:see:: file_new file_over_new_connection file_timeout file_gap
## file_mime_type file_mime_types
## file_metadata_inferred
event file_state_remove%(f: fa_file%);
## Generated when an internal DNS lookup produces the same result as last time.

View file

@ -53,31 +53,35 @@ int File::overflow_bytes_idx = -1;
int File::timeout_interval_idx = -1;
int File::bof_buffer_size_idx = -1;
int File::bof_buffer_idx = -1;
int File::meta_mime_type_idx = -1;
int File::meta_mime_types_idx = -1;
void File::StaticInit()
{
if ( id_idx != -1 )
return;
id_idx = Idx("id");
parent_id_idx = Idx("parent_id");
source_idx = Idx("source");
is_orig_idx = Idx("is_orig");
conns_idx = Idx("conns");
last_active_idx = Idx("last_active");
seen_bytes_idx = Idx("seen_bytes");
total_bytes_idx = Idx("total_bytes");
missing_bytes_idx = Idx("missing_bytes");
overflow_bytes_idx = Idx("overflow_bytes");
timeout_interval_idx = Idx("timeout_interval");
bof_buffer_size_idx = Idx("bof_buffer_size");
bof_buffer_idx = Idx("bof_buffer");
id_idx = Idx("id", fa_file_type);
parent_id_idx = Idx("parent_id", fa_file_type);
source_idx = Idx("source", fa_file_type);
is_orig_idx = Idx("is_orig", fa_file_type);
conns_idx = Idx("conns", fa_file_type);
last_active_idx = Idx("last_active", fa_file_type);
seen_bytes_idx = Idx("seen_bytes", fa_file_type);
total_bytes_idx = Idx("total_bytes", fa_file_type);
missing_bytes_idx = Idx("missing_bytes", fa_file_type);
overflow_bytes_idx = Idx("overflow_bytes", fa_file_type);
timeout_interval_idx = Idx("timeout_interval", fa_file_type);
bof_buffer_size_idx = Idx("bof_buffer_size", fa_file_type);
bof_buffer_idx = Idx("bof_buffer", fa_file_type);
meta_mime_type_idx = Idx("mime_type", inferred_file_metadata_type);
meta_mime_types_idx = Idx("mime_types", inferred_file_metadata_type);
}
File::File(const string& file_id, const string& source_name, Connection* conn,
analyzer::Tag tag, bool is_orig)
: id(file_id), val(0), file_reassembler(0), stream_offset(0),
reassembly_max_buffer(0), did_mime_type(false),
reassembly_max_buffer(0), did_metadata_inference(false),
reassembly_enabled(false), postpone_timeout(false), done(false),
analyzers(this)
{
@ -169,11 +173,13 @@ double File::LookupFieldDefaultInterval(int idx) const
return rval;
}
int File::Idx(const string& field)
int File::Idx(const string& field, const RecordType* type)
{
int rval = fa_file_type->FieldOffset(field.c_str());
int rval = type->FieldOffset(field.c_str());
if ( rval < 0 )
reporter->InternalError("Unknown fa_file field: %s", field.c_str());
reporter->InternalError("Unknown %s field: %s", type->GetName().c_str(),
field.c_str());
return rval;
}
@ -281,50 +287,46 @@ void File::SetReassemblyBuffer(uint64 max)
reassembly_max_buffer = max;
}
bool File::DetectMIME()
void File::InferMetadata()
{
did_mime_type = true;
did_metadata_inference = true;
Val* bof_buffer_val = val->Lookup(bof_buffer_idx);
if ( ! bof_buffer_val )
{
if ( bof_buffer.size == 0 )
return false;
return;
BroString* bs = concatenate(bof_buffer.chunks);
bof_buffer_val = new StringVal(bs);
val->Assign(bof_buffer_idx, bof_buffer_val);
}
if ( ! FileEventAvailable(file_metadata_inferred) )
return;
RuleMatcher::MIME_Matches matches;
const u_char* data = bof_buffer_val->AsString()->Bytes();
uint64 len = bof_buffer_val->AsString()->Len();
len = min(len, LookupFieldDefaultCount(bof_buffer_size_idx));
file_mgr->DetectMIME(data, len, &matches);
if ( matches.empty() )
val_list* vl = new val_list();
vl->append(val->Ref());
RecordVal* meta = new RecordVal(inferred_file_metadata_type);
vl->append(meta);
if ( ! matches.empty() )
{
return false;
meta->Assign(meta_mime_type_idx,
new StringVal(*(matches.begin()->second.begin())));
meta->Assign(meta_mime_types_idx,
file_analysis::GenMIMEMatchesVal(matches));
}
if ( FileEventAvailable(file_mime_type) )
{
val_list* vl = new val_list();
vl->append(val->Ref());
vl->append(new StringVal(*(matches.begin()->second.begin())));
FileEvent(file_mime_type, vl);
}
if ( FileEventAvailable(file_mime_types) )
{
val_list* vl = new val_list();
vl->append(val->Ref());
vl->append(file_analysis::GenMIMEMatchesVal(matches));
FileEvent(file_mime_types, vl);
}
return true;
FileEvent(file_metadata_inferred, vl);
return;
}
bool File::BufferBOF(const u_char* data, uint64 len)
@ -357,9 +359,9 @@ void File::DeliverStream(const u_char* data, uint64 len)
// Buffer enough data for the BOF buffer
BufferBOF(data, len);
if ( ! did_mime_type && bof_buffer.full &&
if ( ! did_metadata_inference && bof_buffer.full &&
LookupFieldDefaultCount(missing_bytes_idx) == 0 )
DetectMIME();
InferMetadata();
DBG_LOG(DBG_FILE_ANALYSIS,
"[%s] %" PRIu64 " stream bytes in at offset %" PRIu64 "; %s [%s%s]",
@ -589,7 +591,7 @@ void File::FileEvent(EventHandlerPtr h, val_list* vl)
mgr.QueueEvent(h, vl);
if ( h == file_new || h == file_over_new_connection ||
h == file_mime_type || h == file_mime_types ||
h == file_metadata_inferred ||
h == file_timeout || h == file_extraction_limit )
{
// immediate feedback is required for these events.

View file

@ -230,12 +230,11 @@ protected:
bool BufferBOF(const u_char* data, uint64 len);
/**
* Does mime type detection via file magic signatures and assigns
* strongest matching mime type (if available) to \c mime_type
* field in #val. It uses the data in the BOF buffer.
* @return whether a mime type match was found.
* Does metadata inference (e.g. mime type detection via file
* magic signatures) using data in the BOF (beginning-of-file) buffer
* and raises an event with the metadata.
*/
bool DetectMIME();
void InferMetadata();
/**
* Enables reassembly on the file.
@ -266,10 +265,11 @@ protected:
/**
* Lookup a record field index/offset by name.
* @param field_name the name of the \c fa_file record field.
* @param field_name the name of the record field.
* @param type the record type for which the field will be looked up.
* @return the field offset in #val record corresponding to \a field_name.
*/
static int Idx(const string& field_name);
static int Idx(const string& field_name, const RecordType* type);
/**
* Initializes static member.
@ -282,7 +282,7 @@ protected:
FileReassembler* file_reassembler; /**< A reassembler for the file if it's needed. */
uint64 stream_offset; /**< The offset of the file which has been forwarded. */
uint64 reassembly_max_buffer; /**< Maximum allowed buffer for reassembly. */
bool did_mime_type; /**< Whether the mime type ident has already been attempted. */
bool did_metadata_inference; /**< Whether the metadata inference has already been attempted. */
bool reassembly_enabled; /**< Whether file stream reassembly is needed. */
bool postpone_timeout; /**< Whether postponing timeout is requested. */
bool done; /**< If this object is about to be deleted. */
@ -313,6 +313,9 @@ protected:
static int bof_buffer_idx;
static int mime_type_idx;
static int mime_types_idx;
static int meta_mime_type_idx;
static int meta_mime_types_idx;
};
} // namespace file_analysis

View file

@ -9,7 +9,7 @@
using namespace input;
Component::Component(const std::string& name, factory_callback arg_factory)
: plugin::Component(plugin::component::WRITER, name)
: plugin::Component(plugin::component::READER, name)
{
factory = arg_factory;

View file

@ -117,7 +117,6 @@ SampleLogger* sample_logger = 0;
int signal_val = 0;
int optimize = 0;
int do_notice_analysis = 0;
int rule_bench = 0;
extern char version[];
char* command_line_policy = 0;
vector<string> params;
@ -195,7 +194,6 @@ void usage()
fprintf(stderr, " -F|--force-dns | force DNS\n");
fprintf(stderr, " -I|--print-id <ID name> | print out given ID\n");
fprintf(stderr, " -K|--md5-hashkey <hashkey> | set key for MD5-keyed hashing\n");
fprintf(stderr, " -L|--rule-benchmark | benchmark for rules\n");
fprintf(stderr, " -N|--print-plugins | print available plugins and exit (-NN for verbose)\n");
fprintf(stderr, " -O|--optimize | optimize policy script\n");
fprintf(stderr, " -P|--prime-dns | prime DNS\n");
@ -503,7 +501,6 @@ int main(int argc, char** argv)
{"save-seeds", required_argument, 0, 'H'},
{"set-seed", required_argument, 0, 'J'},
{"md5-hashkey", required_argument, 0, 'K'},
{"rule-benchmark", no_argument, 0, 'L'},
{"print-plugins", no_argument, 0, 'N'},
{"optimize", no_argument, 0, 'O'},
{"prime-dns", no_argument, 0, 'P'},
@ -668,10 +665,6 @@ int main(int argc, char** argv)
hmac_key_set = 1;
break;
case 'L':
++rule_bench;
break;
case 'N':
++print_plugins;
break;