mirror of
https://github.com/zeek/zeek.git
synced 2025-10-16 13:38:19 +00:00

This is a larger internal change that moves the analyzer infrastructure to a more flexible model where the available analyzers don't need to be hardcoded at compile time anymore. While currently they actually still are, this will in the future enable external analyzer plugins. For now, it does already add the capability to dynamically enable/disable analyzers from script-land, replacing the old Analyzer::Available() methods. There are three major parts going into this: - A new plugin infrastructure in src/plugin. This is independent of analyzers and will eventually support plugins for other parts of Bro as well (think: readers and writers). The goal is that plugins can be alternatively compiled in statically or loadead dynamically at runtime from a shared library. While the latter isn't there yet, there'll be almost no code change for a plugin to make it dynamic later (hopefully :) - New analyzer infrastructure in src/analyzer. I've moved a number of analyzer-related classes here, including Analyzer and DPM; the latter now renamed to Analyzer::Manager. More will move here later. Currently, there's only one plugin here, which provides *all* existing analyzers. We can modularize this further in the future (or not). - A new script interface in base/framework/analyzer. I think that this will eventually replace the dpm framework, but for now that's still there as well, though some parts have moved over. I've also remove the dpd_config table; ports are now configured via the analyzer framework. For exmaple, for SSH: const ports = { 22/tcp } &redef; event bro_init() &priority=5 { ... Analyzer::register_for_ports(Analyzer::ANALYZER_SSH, ports); } As you can see, the old ANALYZER_SSH constants have more into an enum in the Analyzer namespace. This is all hardly tested right now, and not everything works yet. There's also a lot more cleanup to do (moving more classes around; removing no longer used functionality; documenting script and C++ interfaces; regression tests). But it seems to generally work with a small trace at least. The debug stream "dpm" shows more about the loaded/enabled analyzers. A new option -N lists loaded plugins and what they provide (including those compiled in statically; i.e., right now it outputs all the analyzers). This is all not cast-in-stone yet, for some things we need to see if they make sense this way. Feedback welcome.
333 lines
6.9 KiB
C++
333 lines
6.9 KiB
C++
#include <algorithm>
|
|
|
|
#include "ContentLine.h"
|
|
#include "TCP.h"
|
|
|
|
ContentLine_Analyzer::ContentLine_Analyzer(Connection* conn, bool orig)
|
|
: TCP_SupportAnalyzer("CONTENTLINE", conn, orig)
|
|
{
|
|
InitState();
|
|
}
|
|
|
|
ContentLine_Analyzer::ContentLine_Analyzer(const char* name, Connection* conn, bool orig)
|
|
: TCP_SupportAnalyzer(name, conn, orig)
|
|
{
|
|
InitState();
|
|
}
|
|
|
|
void ContentLine_Analyzer::InitState()
|
|
{
|
|
flag_NULs = 0;
|
|
CR_LF_as_EOL = (CR_as_EOL | LF_as_EOL);
|
|
skip_deliveries = 0;
|
|
skip_partial = 0;
|
|
buf = 0;
|
|
seq_delivered_in_lines = 0;
|
|
skip_pending = 0;
|
|
seq = 0;
|
|
seq_to_skip = 0;
|
|
plain_delivery_length = 0;
|
|
is_plain = 0;
|
|
|
|
InitBuffer(0);
|
|
}
|
|
|
|
void ContentLine_Analyzer::InitBuffer(int size)
|
|
{
|
|
if ( buf && buf_len >= size )
|
|
// Don't shrink the buffer, because it's not clear in that
|
|
// case how to deal with characters in it that no longer fit.
|
|
return;
|
|
|
|
if ( size < 128 )
|
|
size = 128;
|
|
|
|
u_char* b = new u_char[size];
|
|
|
|
if ( buf )
|
|
{
|
|
if ( offset > 0 )
|
|
memcpy(b, buf, offset);
|
|
delete [] buf;
|
|
}
|
|
else
|
|
{
|
|
offset = 0;
|
|
last_char = 0;
|
|
}
|
|
|
|
buf = b;
|
|
buf_len = size;
|
|
}
|
|
|
|
ContentLine_Analyzer::~ContentLine_Analyzer()
|
|
{
|
|
delete [] buf;
|
|
}
|
|
|
|
int ContentLine_Analyzer::HasPartialLine() const
|
|
{
|
|
return buf && offset > 0;
|
|
}
|
|
|
|
void ContentLine_Analyzer::DeliverStream(int len, const u_char* data,
|
|
bool is_orig)
|
|
{
|
|
TCP_SupportAnalyzer::DeliverStream(len, data, is_orig);
|
|
|
|
if ( len <= 0 || SkipDeliveries() )
|
|
return;
|
|
|
|
if ( skip_partial )
|
|
{
|
|
TCP_Analyzer* tcp =
|
|
static_cast<TCP_ApplicationAnalyzer*>(Parent())->TCP();
|
|
|
|
if ( tcp && tcp->IsPartial() )
|
|
return;
|
|
}
|
|
|
|
if ( buf && len + offset >= buf_len )
|
|
{ // Make sure we have enough room to accommodate the new stuff.
|
|
int old_buf_len = buf_len;
|
|
buf_len = ((offset + len) * 3) / 2 + 1;
|
|
|
|
u_char* tmp = new u_char[buf_len];
|
|
for ( int i = 0; i < old_buf_len; ++i )
|
|
tmp[i] = buf[i];
|
|
|
|
delete [] buf;
|
|
buf = tmp;
|
|
|
|
if ( ! buf )
|
|
reporter->InternalError("out of memory delivering endpoint line");
|
|
}
|
|
|
|
DoDeliver(len, data);
|
|
|
|
seq += len;
|
|
}
|
|
|
|
void ContentLine_Analyzer::Undelivered(int seq, int len, bool orig)
|
|
{
|
|
ForwardUndelivered(seq, len, orig);
|
|
}
|
|
|
|
void ContentLine_Analyzer::EndpointEOF(bool is_orig)
|
|
{
|
|
if ( offset > 0 )
|
|
DeliverStream(1, (const u_char*) "\n", is_orig);
|
|
}
|
|
|
|
void ContentLine_Analyzer::SetPlainDelivery(int64_t length)
|
|
{
|
|
if ( length < 0 )
|
|
reporter->InternalError("negative length for plain delivery");
|
|
|
|
plain_delivery_length = length;
|
|
}
|
|
|
|
void ContentLine_Analyzer::DoDeliver(int len, const u_char* data)
|
|
{
|
|
seq_delivered_in_lines = seq;
|
|
|
|
while ( len > 0 && ! SkipDeliveries() )
|
|
{
|
|
if ( (CR_LF_as_EOL & CR_as_EOL) &&
|
|
last_char == '\r' && *data == '\n' )
|
|
{
|
|
// CR is already considered as EOL.
|
|
// Compress CRLF to just one line termination.
|
|
//
|
|
// Note, we test this prior to checking for
|
|
// "plain delivery" because (1) we might have
|
|
// made the decision to switch to plain delivery
|
|
// based on a line terminated with '\r' for
|
|
// which a '\n' then arrived, and (2) we are
|
|
// careful when executing plain delivery to
|
|
// clear last_char once we do so.
|
|
last_char = *data;
|
|
--len; ++data; ++seq;
|
|
++seq_delivered_in_lines;
|
|
}
|
|
|
|
if ( plain_delivery_length > 0 )
|
|
{
|
|
int deliver_plain = min(plain_delivery_length, (int64_t)len);
|
|
|
|
last_char = 0; // clear last_char
|
|
plain_delivery_length -= deliver_plain;
|
|
is_plain = 1;
|
|
|
|
ForwardStream(deliver_plain, data, IsOrig());
|
|
|
|
is_plain = 0;
|
|
|
|
data += deliver_plain;
|
|
len -= deliver_plain;
|
|
if ( len == 0 )
|
|
return;
|
|
}
|
|
|
|
if ( skip_pending > 0 )
|
|
SkipBytes(skip_pending);
|
|
|
|
// Note that the skipping must take place *after*
|
|
// the CR/LF check above, so that the '\n' of the
|
|
// previous line is skipped first.
|
|
if ( seq < seq_to_skip )
|
|
{
|
|
// Skip rest of the data and return
|
|
int64_t skip_len = seq_to_skip - seq;
|
|
if ( skip_len > len )
|
|
skip_len = len;
|
|
|
|
ForwardUndelivered(seq, skip_len, IsOrig());
|
|
|
|
len -= skip_len; data += skip_len; seq += skip_len;
|
|
seq_delivered_in_lines += skip_len;
|
|
}
|
|
|
|
if ( len <= 0 )
|
|
break;
|
|
|
|
int n = DoDeliverOnce(len, data);
|
|
len -= n;
|
|
data += n;
|
|
seq += n;
|
|
}
|
|
}
|
|
|
|
int ContentLine_Analyzer::DoDeliverOnce(int len, const u_char* data)
|
|
{
|
|
const u_char* data_start = data;
|
|
|
|
if ( len <= 0 )
|
|
return 0;
|
|
|
|
for ( ; len > 0; --len, ++data )
|
|
{
|
|
if ( offset >= buf_len )
|
|
InitBuffer(buf_len * 2);
|
|
|
|
int c = data[0];
|
|
|
|
#define EMIT_LINE \
|
|
{ \
|
|
buf[offset] = '\0'; \
|
|
int seq_len = data + 1 - data_start; \
|
|
seq_delivered_in_lines = seq + seq_len; \
|
|
last_char = c; \
|
|
ForwardStream(offset, buf, IsOrig()); \
|
|
offset = 0; \
|
|
return seq_len; \
|
|
}
|
|
|
|
switch ( c ) {
|
|
case '\r':
|
|
// Look ahead for '\n'.
|
|
if ( len > 1 && data[1] == '\n' )
|
|
{
|
|
--len; ++data;
|
|
last_char = c;
|
|
c = data[0];
|
|
EMIT_LINE
|
|
}
|
|
|
|
else if ( CR_LF_as_EOL & CR_as_EOL )
|
|
EMIT_LINE
|
|
|
|
else
|
|
buf[offset++] = c;
|
|
break;
|
|
|
|
case '\n':
|
|
if ( last_char == '\r' )
|
|
{
|
|
--offset; // remove '\r'
|
|
EMIT_LINE
|
|
}
|
|
|
|
else if ( CR_LF_as_EOL & LF_as_EOL )
|
|
EMIT_LINE
|
|
|
|
else
|
|
{
|
|
if ( Conn()->FlagEvent(SINGULAR_LF) )
|
|
Conn()->Weird("line_terminated_with_single_LF");
|
|
buf[offset++] = c;
|
|
}
|
|
break;
|
|
|
|
case '\0':
|
|
if ( flag_NULs )
|
|
CheckNUL();
|
|
else
|
|
buf[offset++] = c;
|
|
break;
|
|
|
|
default:
|
|
buf[offset++] = c;
|
|
break;
|
|
}
|
|
|
|
if ( last_char == '\r' )
|
|
if ( Conn()->FlagEvent(SINGULAR_CR) )
|
|
Conn()->Weird("line_terminated_with_single_CR");
|
|
|
|
last_char = c;
|
|
}
|
|
|
|
return data - data_start;
|
|
}
|
|
|
|
void ContentLine_Analyzer::CheckNUL()
|
|
{
|
|
// If this is the first byte seen on this connection,
|
|
// and if the connection's state is PARTIAL, then we've
|
|
// intercepted a keep-alive, and shouldn't complain
|
|
// about it. Note that for PARTIAL connections, the
|
|
// starting sequence number is adjusted as though there
|
|
// had been an initial SYN, so we check for whether
|
|
// the connection has at most two bytes so far.
|
|
|
|
TCP_Analyzer* tcp =
|
|
static_cast<TCP_ApplicationAnalyzer*>(Parent())->TCP();
|
|
|
|
if ( tcp )
|
|
{
|
|
TCP_Endpoint* endp = IsOrig() ? tcp->Orig() : tcp->Resp();
|
|
if ( endp->state == TCP_ENDPOINT_PARTIAL &&
|
|
endp->LastSeq() - endp->StartSeq() <= 2 )
|
|
; // Ignore it.
|
|
else
|
|
{
|
|
if ( Conn()->FlagEvent(NUL_IN_LINE) )
|
|
Conn()->Weird("NUL_in_line");
|
|
flag_NULs = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
void ContentLine_Analyzer::SkipBytesAfterThisLine(int64_t length)
|
|
{
|
|
// This is a little complicated because Bro has to handle
|
|
// both CR and CRLF as a line break. When a line is delivered,
|
|
// it's possible that only a CR is seen, and we may not know
|
|
// if an LF is following until we see the next packet. If an
|
|
// LF follows, we should start skipping bytes *after* the LF.
|
|
// So we keep the skip as 'pending' until we see the next
|
|
// character in DoDeliver().
|
|
|
|
if ( last_char == '\r' )
|
|
skip_pending = length;
|
|
else
|
|
SkipBytes(length);
|
|
}
|
|
|
|
void ContentLine_Analyzer::SkipBytes(int64_t length)
|
|
{
|
|
skip_pending = 0;
|
|
seq_to_skip = SeqDelivered() + length;
|
|
}
|
|
|