zeek/scripts/perl/lib/Bro/Log/Conn.pm

773 lines
13 KiB
Perl

package Bro::Log::Conn;
require 5.006_001;
use strict;
use vars qw( $VERSION
$NULL_VALUE
$DEBUG );
# $Id: Conn.pm 1426 2005-09-30 00:19:18Z rwinslow $
$VERSION = 1.20;
$NULL_VALUE = -1;
$DEBUG = 0;
my $CONN_SPLIT_PATT = ' ';
# my $CONN_SPLIT_PATT = qr/ /o;
# Map data descriptions to subroutine names
my %DATA_MAP = ( timestamp => \&timestamp,
duration => \&duration,
source_ip => \&srcip,
srcip => \&srcip,
destination_ip => \&dstip,
dstip => \&dstip,
service => \&service,
source_port => \&srcport,
srcport => \&srcport,
destination_port => \&dstport,
dstport => \&dstport,
protocol => \&protocol,
source_bytes => \&srcbytes,
srcbytes => \&srcbytes,
destination_bytes => \&srcbytes,
dstbytes => \&dstbytes,
connection_status => \&connstat,
connstat => \&connstat,
source_network => \&srcnetwork,
srcnetwork => \&srcnetwork,
other => \&other,
);
sub new
{
my $_log_line = $_[0] || return( undef ); # string ref
# Order of data in array
# 0 = timestamp
# 1 = duration
# 2 = source ip
# 3 = destination ip
# 4 = service
# 5 = source port
# 6 = destination port
# 7 = protocol
# 8 = source bytes
# 9 = destination bytes
# 10 = connection status
# 11 = source network
# 12 = other
my @log_parts = split( $CONN_SPLIT_PATT, $$_log_line, 13 );
if( defined( $log_parts[11] ) )
{
return( \@log_parts );
}
else
{
return( undef );
}
}
sub output
{
my $sub_name = 'output';
my $data = $_[0] || return undef;
my $format = $_[1] || '';
my @ret_data;
if( ref( $format ) ne 'ARRAY' )
{
$format = [ 'timestamp',
'duration',
'srcip',
'dstip',
'service',
'srcport',
'dstport',
'protocol',
'srcbytes',
'dstbytes',
'connstat',
'srcnetwork',
'other',
];
}
my $i = 0;
foreach my $key( @{$format} )
{
if( exists( $DATA_MAP{$key} ) )
{
$ret_data[$i] = &{$DATA_MAP{$key}}( $data );
++$i;
}
else
{
return( undef );
}
}
if( wantarray )
{
return( @ret_data );
}
else
{
return( join( ' ', @ret_data ) );
}
}
sub timestamp
{
my $sub_name = 'timestamp';
my $data = $_[0] || return( undef );
return( $data->[0] );
}
sub duration
{
my $sub_name = 'duration';
my $data = $_[0] || return undef;
my $arg1 = $_[1] || 0;
if( $arg1 eq 'raw' )
{
return( $data->[1] );
}
elsif( $data->[1] eq '?' and defined( $NULL_VALUE ) )
{
return( $NULL_VALUE );
}
else
{
return( $data->[1] );
}
}
sub source_ip
{
&srcip;
}
sub srcip
{
my $sub_name = 'srcip';
return( $_[0]->[2] );
}
sub destination_ip
{
&dstip;
}
sub dstip
{
my $sub_name = 'dstip';
return( $_[0]->[3] );
}
sub service
{
my $sub_name = 'service';
return( $_[0]->[4] );
}
sub source_port
{
&srcport;
}
sub srcport
{
my $sub_name = 'srcport';
return( $_[0]->[5] );
}
sub destination_port
{
&dstport
}
sub dstport
{
my $sub_name = 'dstport';
return( $_[0]->[6] );
}
sub protocol
{
my $sub_name = 'protocol';
return( $_[0]->[7] );
}
sub source_bytes
{
&srcbytes;
}
sub srcbytes
{
my $sub_name = 'srcbytes';
my $data = $_[0] || return undef;
my $arg1 = $_[1] || 0;
if( $arg1 eq 'raw' )
{
return( $data->[8] );
}
elsif( $data->[8] eq '?' and defined( $NULL_VALUE ) )
{
return( $NULL_VALUE );
}
elsif( $data->[10] eq 'SF')
{
# safest to only count sessions with normal termination
return( $data->[8] );
}
else
{
return( $NULL_VALUE );
}
}
sub destination_bytes
{
&dstbytes;
}
sub dstbytes
{
my $sub_name = 'dstbytes';
my $data = $_[0] || return undef;
my $arg1 = $_[1] || 0;
if( $arg1 eq 'raw' )
{
return( $data->[9] );
}
elsif( $data->[9] eq '?' and defined( $NULL_VALUE ) )
{
return( $NULL_VALUE );
}
elsif( $data->[10] eq 'SF' )
{
# safest to only count sessions with normal termination
return( $data->[9] );
}
else
{
return( $NULL_VALUE );
}
}
sub connstat
{
my $sub_name = 'connstat';
my $data = $_[0] || return undef;
return( $data->[10] );
}
sub source_network
{
&srcnetwork;
}
sub srcnetwork
{
my $sub_name = 'srcnetwork';
my $data = $_[0] || return undef;
chomp( $data->[11] );
return( $data->[11] );
}
sub tag
{
my $sub_name = 'tag';
my $data = $_[0] || return( undef );
my $other_field = $data->[12];
my @ret_tag_ids;
while( $other_field =~ s/(\@[[:digit:]]+)// )
{
push( @ret_tag_ids, $1 );
}
if( @ret_tag_ids > 0 )
{
if( wantarray )
{
return( @ret_tag_ids );
}
else
{
return( \@ret_tag_ids );
}
}
else
{
return( undef );
}
}
sub other
{
my $sub_name = 'other';
my $data = $_[0] || return undef;
# Remove any newline character at the end
chomp( $data->[12] );
return( $data->[12] );
}
sub timerange
{
my $sub_name = 'timerange';
# Find the most likely beginning and ending times covered by a given
# conn file.
my $filename = $_[0];
my $find_start_time = $_[1];
my $find_end_time = $_[2];
my $start_time = 9999999999;
my $end_time = -1;
my $max_start_lines = 10000;
my $max_end_lines = 10000;
my $max_line_length = 5000;
my $f_size = ( stat( $filename ) )[7] || 0;
my $default_start;
my $default_end;
if( $DEBUG > 2 )
{
warn( __PACKAGE__ . "::$sub_name, Filename: $filename\n" );
}
# If the file is zero size then don't even both continuing
if( $f_size < 1 )
{
if( $DEBUG > 2 )
{
warn( __PACKAGE__ . "::$sub_name, File is zero size, skipping\n" );
}
return( undef );
}
# If $find_start_time and $find_end_time are defined then the the first
# line that is greater than or equal to the timestamp in $find_start_time
# will be read by seek and then set into $start_pos.
# The last line that contains a timestamp less than or equal to
# $find_end_time will be read by seek and then set in $end_pos.
eval {
local $SIG{ALRM} = sub { die( "Alarm Timeout\n" ) };
alarm 90;
if( open( INFILE, $filename ) )
{
my $s_idx = 0; # start line counter
my $s_no_change = 0; # start no change counter
# Set the very first connection timestamp to $default_start
while( ! $default_start and defined( my $line = <INFILE> ) )
{
if( my $conn_line = new( \$line ) )
{
$default_start = timestamp( $conn_line );
}
}
# Find the smallest timestamp in the first 1000 lines where the
# connection is complete (SF) or (REJ) and the duration is less
# than .1 seconds
while( ( $s_idx < $max_start_lines ) and
( $s_no_change < 20 ) and
defined( my $ln = <INFILE> ) )
{
if( my $conn_line = new( \$ln ) )
{
if( connstat( $conn_line ) =~ m/^(?:SF)|(?:REJ)$/ )
{
if( duration( $conn_line ) < 0.1 )
{
my $w_timestamp = timestamp( $conn_line );
if( $w_timestamp < $start_time )
{
$start_time = $w_timestamp;
$s_no_change = 0;
}
else
{
++$s_no_change;
}
}
}
}
++$s_idx;
}
close( INFILE );
# Find the largest timestamp in the last 20 lines
# Each connection with a status of "SF" or "REJ" will be counted as
# one line. Every line will be examined but the "SF" or "REJ"
# lines are the only ones that give a good picture as to the time
# state of the file.
if( sysopen( INFILE, $filename, 0 ) )
{
sysseek( INFILE, $f_size, 0 );
my $cur_pos = sysseek( INFILE, 0, 1 );
my $nl_pos = $cur_pos;
my $matched_count = 0;
my $line_count = 0;
# Get last 20 lines
while( $matched_count < 20 and
$line_count < $max_end_lines )
{
my $new_line_found = 0;
my $buf;
sysread( INFILE, $buf, 1 );
if( $cur_pos > -1 )
{
if( $buf eq $/ )
{
$new_line_found = 1;
}
}
else
{
# Must have hit the beginning of the file
if( $nl_pos > 20 ) # supress things like blank lines
{
sysseek( INFILE, 0, 0 );
$new_line_found = 1;
}
else
{
last;
}
}
if( $new_line_found )
{
my $cur_line = '';
++$line_count;
# Make sure that the line is not too large
# Fix for some funky rsync errors that may occur
if( $nl_pos - $cur_pos > $max_line_length )
{
# WAY too big, just mark new position and ignore
}
else
{
sysread( INFILE, $cur_line, $nl_pos - $cur_pos );
if( my $conn_line = new( \$cur_line ) )
{
if( ! $default_end )
{
$default_end = timestamp( $conn_line );
}
if( duration( $conn_line ) < 0.1 and duration( $conn_line ) >= 0 )
{
my $w_timestamp = timestamp( $conn_line );
if( $w_timestamp > $end_time )
{
$end_time = $w_timestamp;
}
}
if( connstat( $conn_line ) =~ m/^(?:SF)|(?:REJ)$/ )
{
++$matched_count;
}
}
}
$nl_pos = $cur_pos;
}
--$cur_pos;
if( $cur_pos < 0 )
{
last;
}
sysseek( INFILE, $cur_pos, 0 );
}
}
else
{
if( $DEBUG > 0 )
{
warn( __PACKAGE__ . "::$sub_name, Unable to open file '$filename' with sysread.\n" );
}
return( undef );
}
close( INFILE );
}
else
{
if( $DEBUG > 0 )
{
warn( __PACKAGE__ . "::$sub_name, Unable to open file '$filename'.\n" );
}
return( undef );
}
close( INFILE );
};
alarm 0;
# Make sure that $start_time has something other than the filler value.
if( $start_time == 9999999999 )
{
if( $default_start )
{
$start_time = $default_start;
if( $DEBUG > 1 )
{
warn( __PACKAGE__ . "::$sub_name, No start_time was found, setting to a default of $default_start\n" );
}
}
else
{
if( $DEBUG > 1 )
{
warn( __PACKAGE__ . "::$sub_name, No start_time was found and no default_start time was found\n" );
}
}
}
# Make sure that $end_time has something other than the filler value.
if( $end_time == -1 )
{
if( $default_end )
{
$end_time = $default_end;
if( $DEBUG > 1 )
{
warn( __PACKAGE__ . "::$sub_name, No end_time was found, setting to a default of $default_start\n" );
}
}
else
{
if( $DEBUG > 1 )
{
warn( __PACKAGE__ . "::$sub_name, No end_time was found and no default_end time was found\n" );
}
}
}
if( $DEBUG > 2 )
{
warn( " " . __PACKAGE__ . "::$sub_name, Start time: $start_time\n" );
warn( " " . __PACKAGE__ . "::$sub_name, End time: $end_time\n" );
}
if( $@ )
{
if( $@ =~ m/Alarm Timeout/ )
{
if( !( $start_time and $end_time ) )
{
if( $DEBUG > 0 )
{
warn( __PACKAGE__ . "::$sub_name, Error occurred in trying to read the file $filename\n" );
}
return( undef );
}
else
{
if( $DEBUG > 0 )
{
warn( __PACKAGE__ . "::$sub_name, Timed out during file read. The first and last timestamps have been set as the range of time available\n" );
}
}
}
else
{
warn( $@ );
return( undef );
}
}
return( $start_time, $end_time );
}
sub containstag
{
my $sub_name = 'containstag';
my $data = shift || return( undef );
my @tags_to_match = @_;
my $conn_tags = tag( $data ) || return( 0 );
my $matched_tag = 0;
OUT_LOOP:
{
foreach my $tag_to_match( @tags_to_match )
{
foreach my $tag_id( @{$conn_tags} )
{
if( $tag_id eq $tag_to_match )
{
$matched_tag = $tag_id;
last OUT_LOOP;
}
}
}
} # end OUT_LOOP
return( $matched_tag );
}
sub startposition
{
my $sub_name = 'startposition';
# Find the first file position where $timestamp is greater than or equal to
# a timestamp in the file.
my $timestamp = $_[0];
}
sub endposition
{
my $sub_name = 'endposition';
# Find the last file position where $timestamp is less than or equal to
# a timestamp in a file.
my $timestamp = $_[0];
}
sub connectsucceed
{
my $sub_name = 'connectsucceed';
my $data = $_[0] || return( undef );
my $S_REGEX = qr/^S/o;
my $S123_REGEX = qr/^S[123]$/o;
my $connstat = connstat( $data );
if( $connstat =~ $S_REGEX )
{
if( $connstat eq 'SF' )
{
return( 1 );
}
elsif( $connstat =~ $S123_REGEX )
{
if( srcbytes( $data ) > 0 && dstbytes( $data ) > 0 )
{
return( 1 );
}
else
{
return( 0 );
}
}
}
else
{
# connection failed
return( 0 );
}
}
sub range
{
my $sub_name = 'range';
my $data = $_[0] || return( undef );
my $match_time = $_[1];
my $error_margin = $_[2];
my $start_time;
my $end_time;
my $duration;
# Make sure that the error margin is greater than zero
if( !( defined( $error_margin ) and $error_margin > 0 ) )
{
$error_margin = 0;
}
$start_time = timestamp( $data );
$duration = duration( $data );
if( $match_time )
{
if( $duration < 0 )
{
$duration = 10;
}
$end_time = $start_time + $duration + $error_margin;
$start_time = $start_time - $error_margin;
if( $match_time >= $start_time and
$match_time <= $end_time )
{
return( 1 );
}
else
{
return( 0 );
}
}
else
{
if( $duration > -1 )
{
$end_time = $start_time + $duration;
}
return( $start_time, $end_time );
}
}
1;
# The args to Bro::Log::Conn::output are the connection array ref returned by
# Bro::Log::Conn::new and an optional array ref of what order and fields
# should be printed.
# EXAMPLE:
# $array_ref = Bro::Log::Conn::new( $ln );
# @output_parts = Bro::Log::Conn::output( $array_ref, [ 'srcip', 'dstip', 'timestamp' ] )
#
# The available fields are as follows:
# timestamp
# duration
# srcip
# dstip
# service
# srcport
# dstport
# protocol
# srcbytes
# dstbytes
# connstat
# srcnetwork
# other
# For convenience any data that is represented by a ? will be replaced by a -1
# This occurs for duration, srcbytes, and dstbytes
# This is adjustable by changing $NULL_VALUE