zeek/src/http-analyzer.pac
2011-08-04 15:21:18 -05:00

430 lines
10 KiB
JavaScript

%extern{
#include <ctype.h>
// Used by unescape_URI().
extern int is_reserved_URI_char(unsigned char ch);
extern int is_unreserved_URI_char(unsigned char ch);
%}
# Remember to call bytestring::free() on the result.
function to_upper(s: const_bytestring): bytestring
%{
char* buf = new char[s.length() + 1];
const char* sp = (const char*) s.begin();
for ( int i = 0; i < s.length(); ++i )
if ( islower(sp[i]) )
buf[i] = toupper(sp[i]);
else
buf[i] = sp[i];
buf[s.length()] = '\0';
return bytestring((uint8*) buf, s.length());
%}
connection HTTP_Conn(bro_analyzer: BroAnalyzer) {
upflow = HTTP_Flow(true);
downflow = HTTP_Flow(false);
};
flow HTTP_Flow(is_orig: bool) {
flowunit = HTTP_PDU(is_orig) withcontext (connection, this);
# States.
%member{
int content_length_;
DeliveryMode delivery_mode_;
bytestring end_of_multipart_;
double msg_start_time_;
int msg_begin_seq_;
int msg_header_end_seq_;
bool build_headers_;
vector<BroVal> headers_;
%}
%init{
content_length_ = 0;
delivery_mode_ = UNKNOWN_DELIVERY_MODE;
msg_start_time_ = 0;
msg_begin_seq_ = 0;
msg_header_end_seq_ = -1;
build_headers_ = (::http_all_headers != 0);
%}
%cleanup{
end_of_multipart_.free();
%}
function content_length(): int
%{
return content_length_;
%}
function delivery_mode(): DeliveryMode
%{
return delivery_mode_;
%}
function end_of_multipart(): const_bytestring
%{
return end_of_multipart_;
%}
# Methods.
function http_request(method: const_bytestring, uri: const_bytestring,
vers: HTTP_Version): bool
%{
if ( ::http_request )
{
bytestring unescaped_uri = unescape_uri(uri);
BifEvent::generate_http_request(connection()->bro_analyzer(),
connection()->bro_analyzer()->Conn(),
bytestring_to_val(method),
bytestring_to_val(uri),
bytestring_to_val(unescaped_uri),
bytestring_to_val(${vers.vers_str}));
unescaped_uri.free();
}
http_message_begin();
return true;
%}
function http_reply(vers: HTTP_Version, code: int,
reason: const_bytestring): bool
%{
if ( ::http_reply )
{
BifEvent::generate_http_reply(connection()->bro_analyzer(),
connection()->bro_analyzer()->Conn(),
bytestring_to_val(${vers.vers_str}), code,
bytestring_to_val(reason));
}
http_message_begin();
return true;
%}
function build_http_header_val(name: const_bytestring,
value: const_bytestring): BroVal
%{
RecordVal* header_record = new RecordVal(mime_header_rec);
StringVal* name_val = 0;
if ( name.length() > 0 )
{
// Make it all uppercase.
name_val = new StringVal(name.length(),
(const char*) name.begin());
name_val->ToUpper();
}
else
name_val = new StringVal("");
header_record->Assign(0, name_val);
header_record->Assign(1, bytestring_to_val(value));
return header_record;
%}
function extract_boundary(value: const_bytestring): bytestring
%{
const char* boundary_prefix = "boundary=";
const char* boundary_begin = strcasestr(
(const char*) value.begin(),
boundary_prefix);
if ( ! boundary_begin )
return bytestring();
boundary_begin += 9;
const char* boundary_end = strcasestr(boundary_begin, ";");
if ( ! boundary_end )
boundary_end = (const char*) value.end();
return bytestring((const uint8*) boundary_begin,
(const uint8*) boundary_end);
%}
function is_end_of_multipart(line: const_bytestring): bool
%{
if ( line.length() < 4 + end_of_multipart_.length() )
return false;
int len = end_of_multipart_.length();
// line =?= "--" end_of_multipart_ "--"
return ( line[0] == '-' && line[1] == '-' &&
line[len + 2] == '-' && line[len + 3] == '-' &&
strncmp((const char*) line.begin() + 2,
(const char*) end_of_multipart_.begin(),
len) == 0 );
%}
function http_header(name_colon: const_bytestring,
value: const_bytestring): bool
%{
const_bytestring name(
name_colon.begin(),
name_colon.length() > 0 ?
name_colon.end() - 1 :
name_colon.end());
if ( bytestring_casecmp(name, "CONTENT-LENGTH") == 0 )
{
content_length_ = bytestring_to_int(value, 10);
delivery_mode_ = CONTENT_LENGTH;
}
else if ( bytestring_casecmp(name, "TRANSFER-ENCODING") == 0 )
{
if ( bytestring_caseprefix(value, "CHUNKED") )
delivery_mode_ = CHUNKED;
}
else if ( bytestring_casecmp(name, "CONTENT-TYPE") == 0 )
{
if ( bytestring_caseprefix(value, "MULTIPART") )
{
end_of_multipart_.free();
end_of_multipart_ = extract_boundary(value);
if ( end_of_multipart_.length() > 0 )
delivery_mode_ = MULTIPART;
}
}
if ( ::http_header )
{
BifEvent::generate_http_header(connection()->bro_analyzer(),
connection()->bro_analyzer()->Conn(),
is_orig(),
bytestring_to_val(name)->ToUpper(),
bytestring_to_val(value));
}
if ( build_headers_ )
headers_.push_back(build_http_header_val(name, value));
return true;
%}
function build_http_headers_val(): BroVal
%{
TableVal* t = new TableVal(mime_header_list);
for ( unsigned int i = 0; i < headers_.size(); ++i )
{ // index starting from 1
Val* index = new Val(i + 1, TYPE_COUNT);
t->Assign(index, headers_[i]);
Unref(index);
}
return t;
%}
function gen_http_all_headers(): void
%{
if ( ::http_all_headers )
{
BifEvent::generate_http_all_headers(connection()->bro_analyzer(),
connection()->bro_analyzer()->Conn(),
is_orig(),
build_http_headers_val());
}
headers_.clear();
%}
function http_end_of_headers(headers: HTTP_Headers): bool
%{
if ( delivery_mode_ != CHUNKED && build_headers_ )
gen_http_all_headers();
// Check if this is the first set of headers
// (i.e. not headers after chunks).
if ( msg_header_end_seq_ == -1 )
msg_header_end_seq_ = flow_buffer_->data_seq();
return true;
%}
function http_message_begin(): void
%{
msg_start_time_ = network_time();
if ( ::http_begin_entity )
{
BifEvent::generate_http_begin_entity(connection()->bro_analyzer(),
connection()->bro_analyzer()->Conn(), is_orig());
}
%}
function build_http_message_stat(): BroVal
%{
int msg_header_length = msg_header_end_seq_ - msg_begin_seq_;
int msg_body_length =
flow_buffer_->data_seq() - msg_header_end_seq_;
bool msg_interrupted = false;
RecordVal* stat = new RecordVal(http_message_stat);
int field = 0;
stat->Assign(field++, new Val(msg_start_time_, TYPE_TIME));
stat->Assign(field++, new Val(msg_interrupted, TYPE_BOOL));
stat->Assign(field++, new StringVal(""));
stat->Assign(field++, new Val(msg_body_length, TYPE_COUNT));
stat->Assign(field++, new Val(0, TYPE_COUNT));
stat->Assign(field++, new Val(msg_header_length, TYPE_COUNT));
return stat;
%}
function http_message_done(pdu: HTTP_PDU): bool
%{
if ( ! headers_.empty() )
gen_http_all_headers();
if ( ::http_end_entity )
{
BifEvent::generate_http_end_entity(connection()->bro_analyzer(),
connection()->bro_analyzer()->Conn(), is_orig());
}
if ( ::http_message_done )
{
BifEvent::generate_http_message_done(connection()->bro_analyzer(),
connection()->bro_analyzer()->Conn(),
is_orig(), build_http_message_stat());
}
end_of_multipart_.free();
// Initialize for next message.
msg_begin_seq_ = flow_buffer_->data_seq();
msg_header_end_seq_ = -1;
return true;
%}
# Remember to call bytestring::free() on the result
function unescape_uri(uri: const_bytestring): bytestring
%{
const u_char* line = uri.begin();
const u_char* line_end = uri.end();
BroAnalyzer a = connection()->bro_analyzer();
// ### Copied from HTTP.cc
byte_vec decoded_URI = new u_char[line_end - line + 1];
byte_vec URI_p = decoded_URI;
// An 'unescaped_special_char' here means a character that
// *should* be escaped, but isn't in the URI. A control
// character that appears directly in the URI would be an
// example. The RFC implies that if we do not unescape the
// URI that we see in the trace, every character should be a
// printable one -- either reserved or unreserved (or '%').
//
// Counting the number of unescaped characters and generating
// a weird event on URI's with unescaped characters (which
// are rare) will let us locate strange-looking URI's in the
// trace -- those URI's are often interesting.
int unescaped_special_char = 0;
while ( line < line_end )
{
if ( *line == '%' )
{
++line;
if ( line == line_end )
{
// How to deal with % at end of line?
// *URI_p++ = '%';
if ( a )
a->Weird("illegal_%_at_end_of_URI");
break;
}
else if ( *line == '%' )
{
// Double '%' might be either due to
// software bug, or, more likely, an
// evasion (e.g., used by Nimda).
// *URI_p++ = '%';
if ( a )
a->Weird("double_%_in_URI");
--line; // ignore the first '%'
}
else if ( isxdigit(line[0]) && isxdigit(line[1]) )
{
*URI_p++ = (decode_hex(line[0]) << 4) +
decode_hex(line[1]);
++line; // place line at last hex digit
}
else
{
if ( a )
a->Weird("unescaped_%_in_URI");
*URI_p++ = '%'; // put back initial '%'
// Take char. without interpretation..
*URI_p++ = *line;
}
}
else
{
if ( ! is_reserved_URI_char(*line) &&
! is_unreserved_URI_char(*line) )
// Count these up as a way to compress
// the corresponding Weird event to a
// single instance.
++unescaped_special_char;
*URI_p++ = *line;
}
++line;
}
URI_p[0] = 0;
if ( unescaped_special_char && a )
a->Weird("unescaped_special_URI_char");
return bytestring(decoded_URI, URI_p - decoded_URI);
%}
};
refine typeattr HTTP_RequestLine += &let {
process_request: bool =
$context.flow.http_request(method, uri, version);
};
refine typeattr HTTP_ReplyLine += &let {
process_reply: bool =
$context.flow.http_reply(version, status.stat_num, reason);
};
refine typeattr HTTP_Header += &let {
process_header: bool =
$context.flow.http_header(name, value);
};
refine typeattr HTTP_Headers += &let {
process_end_of_headers: bool =
$context.flow.http_end_of_headers(this);
};
refine typeattr HTTP_PDU += &let {
process_message: bool =
$context.flow.http_message_done(this);
};