mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
600 lines
20 KiB
Python
600 lines
20 KiB
Python
#! /usr/bin/env python
|
|
#
|
|
# this script finds the end of the syslog file, and then watches
|
|
# for new events to send to Bro. Actually they are reformatted
|
|
# as Broccoli events, and written to stdout. Then some other
|
|
# process can send them to Bro.
|
|
#
|
|
# started as a perl script from unknon source
|
|
# modified for syslog parsing by Scott Campbell
|
|
# more options added by Brian Tierney
|
|
#
|
|
# CHANGELOG: started 07/07/05
|
|
# fixed IPv6 support in sshd login analysis
|
|
# 07/12/05 change logic in ssh deny parsing to only look at what
|
|
# we want rather than reverse.
|
|
#
|
|
"""
|
|
script that looks at interesting entries in a syslog file
|
|
and print out information in a format that broccoli understands
|
|
"""
|
|
import optparse, logging, re, time
|
|
import select, socket, sys, threading
|
|
|
|
RE_SSH = re.compile(r"[\w,\s,\W]*sshd")
|
|
RE_SSH_ACCEPT = re.compile(r"[\w,\s,\W]*Accept")
|
|
# all Failures; leads to false possitives
|
|
# RE_SSH_FAIL = re.compile(r"[\w,\s,\W]*Failed")
|
|
# just failed passwords
|
|
RE_SSH_FAIL = re.compile(r"[\w,\s,\W]*Failed[\s]*password")
|
|
RE_SSH_FAIL_ILLEGAL_USER = re.compile(r"[\w,\s,\W]*illegal[\s]*user|[\w,\s,\W]*invalid[\s]*user")
|
|
RE_SSH_EXCLUDE = re.compile(r"[\w,\s,\W]*com\.apple\.SecurityServer")
|
|
|
|
# only do failures for now
|
|
|
|
RE_SUDO = re.compile(r"[\w,\s,\W]*sudo[\w,\s,\W]+failure|[\w,\s,\W]*sudo[\w,\s,\W]+incorrect password attempts")
|
|
RE_SUDO_FORMAT1 = re.compile(r"[\w,\s,\W]*sudo[\w,\s,\W]+failure")
|
|
RE_SUDO_FORMAT2 = re.compile(r"[\w,\s,\W]*sudo[\w,\s,\W]+incorrect password attempts")
|
|
|
|
RE_SU_SUCCESS = re.compile(r"[\w,\s\W]*su: \(|[\w,\s,\W]*su: SU |[\w,\s,\W]*su[\w,s,\W]+session opened")
|
|
RE_SU_FORMAT1 = re.compile(r"[\w,\s,\W]*session opened for user [\w,\W]+ by [\w,\W]+")
|
|
RE_SU_FORMAT2 = re.compile(r"[\w,\s,\W]*su:[\s]*\(to [\w]+\) [\w]+")
|
|
RE_SU_FORMAT3 = re.compile(r"[\w,\s,\W]*su: SU")
|
|
|
|
|
|
RE_SU_FAIL = re.compile(r"[\w,\s\W]* BAD SU |[\w,\s,\W]* FAILED SU |[\w,\s,\W]*su[\w,\s,\W]*authentication failure")
|
|
RE_SU_FAIL_FORMAT1 = re.compile(r"[\w,\s,\W]*authentication failure")
|
|
RE_SU_FAIL_FORMAT2 = re.compile(r"[\w,\s,\W]*FAILED SU")
|
|
RE_SU_FAIL_FORMAT3 = re.compile(r"[\w,\s,\W]*BAD SU")
|
|
|
|
RE_GRID = re.compile(r"[\w,\s\W]* GRAM")
|
|
RE_GRID_AUTHORIZE_LOCALUSER = re.compile(r"[\w,\s,\W]* Authorized as local user")
|
|
RE_GRID_AUTHORIZE_LOCALUID = re.compile(r"[\w,\s,\W]* Authorized as local uid:")
|
|
RE_GRID_AUTHORIZE_LOCALGID = re.compile(r"[\w,\s,\W]* and local gid:")
|
|
RE_GRID_AUTHENTICATE = re.compile(r"[\w,\s,\W]* Authenticated globus user:")
|
|
RE_GRID_CONNECT = re.compile(r"[\w,\s,\W]* Got connection ")
|
|
RE_GRID_SERVICE = re.compile(r"[\w,\s,\W]* Requested service: ")
|
|
RE_GRID_INFO = re.compile(r"[\w,\s,W]*gridinfo")
|
|
|
|
# not done: generate Bro event for these too
|
|
RE_NEWUSER = re.compile(r"[\w,\s,\W]*new user:[\w,\s,\W]+useradd")
|
|
|
|
# not done: generate Bro event for user root sending mail to yahoo, gmail, hotmail, aol, etc.
|
|
# (maybe even any .com ?)
|
|
RE_ROOT_EMAIL = re.compile(r"[\w,\s,\W]*sendmail[\w,\s,\W]+root[\w,\s,\W]+to `[\w,\s,\W]+\.com")
|
|
|
|
|
|
class HeartBeatThread(threading.Thread):
|
|
"""
|
|
HeartBeat class that inherits from Python Thread class
|
|
"""
|
|
def __init__(self, sleep_seconds):
|
|
threading.Thread.__init__(self)
|
|
self._sleeptime = sleep_seconds
|
|
|
|
def run(self):
|
|
"""
|
|
Sends out a heartbeat event, then goes to sleep for 15 minutes
|
|
"""
|
|
addr = socket.gethostbyname(socket.gethostname())
|
|
heartbeat_string = "Syslog_daemon_heartbeat"
|
|
while True:
|
|
time_double = time.time()
|
|
print "heartbeat_event double=%d addr=%s string=%s" % (time_double, addr, heartbeat_string)
|
|
time.sleep(self._sleeptime)
|
|
|
|
def time_conversion(month, date, clocktime):
|
|
"""
|
|
Convert time string to double, need to handle the
|
|
year field
|
|
"""
|
|
year = time.asctime().split()[-1:][0]
|
|
time_str = " ".join((month, date, clocktime, year))
|
|
try:
|
|
time_tuple = time.strptime(time_str, "%b %d %H:%M:%S %Y")
|
|
except:
|
|
log.error( "time.strptime error converting %s" % time_str )
|
|
return 0.0
|
|
time_double = time.mktime(time_tuple)
|
|
return time_double
|
|
|
|
def check_ip(ip):
|
|
"""
|
|
Covert hostname to IP if necessary, and check if valid IP
|
|
"""
|
|
|
|
try:
|
|
ip = socket.gethostbyname(ip)
|
|
except:
|
|
log.error( "Error converting %s to an IP " % ip )
|
|
return ""
|
|
|
|
# if passed in something that looked like an IP, gethostbyname might not return an error, so best to check
|
|
try:
|
|
ips = ip.split('.')
|
|
except:
|
|
log.error("Error spliting IP into components: %s" % ip)
|
|
return ""
|
|
|
|
if len(ips) == 4:
|
|
if int(ips[0]) < 256 and int(ips[1]) < 256 and int(ips[2]) < 256 and int(ips[3]) < 256:
|
|
return ip
|
|
else:
|
|
return ""
|
|
else:
|
|
return ""
|
|
|
|
def find_user(fields):
|
|
"""
|
|
Find the user in a list of fields where user is the name in user=name
|
|
"""
|
|
user = "unknown"
|
|
for f in fields:
|
|
try:
|
|
user1, user2 = f.split('=')
|
|
if user1 == 'user' or user1 == 'ruser':
|
|
if user2 != "":
|
|
return user2
|
|
except:
|
|
pass
|
|
return user
|
|
|
|
def parse_ssh(line, line_cnt):
|
|
"""
|
|
print out the ssh fields into the broccoli format
|
|
|
|
Note: still needs to handle odd syslog formats, such as (double set of timestamps):
|
|
Jan 1 00:03:44 127.0.0.1 2005-12-31 21:51:10.163447500 isthiswhatyouwant.jay.lbl.gov sshd[] PAM: Authentication failure for ldoolitt from astound-69-42-20-231.ca.astound.net
|
|
|
|
There are many different formats, but the following seem fairly consistant:
|
|
for username
|
|
from hostname
|
|
so look for works "for" and "from", and then take the fields after that
|
|
|
|
"""
|
|
|
|
fields = line.split()
|
|
time_double = time_conversion(fields[0], fields[1], fields[2])
|
|
# look for 'from' hostname
|
|
n = 0
|
|
from_ip = ""
|
|
for f in fields:
|
|
if f == "from":
|
|
from_ip = fields[n+1]
|
|
break
|
|
n += 1
|
|
|
|
# check for valid IP (some look like this: "::ffff:128.3.60.86")
|
|
ipf = from_ip.split(':')
|
|
if len(ipf) > 1:
|
|
ip = ipf[len(ipf) - 1]
|
|
else:
|
|
ip = ipf[0]
|
|
|
|
# verify that this is a valid IP address
|
|
ip = check_ip(ip)
|
|
lh_ip = check_ip(fields[3])
|
|
|
|
success = False
|
|
failed = False
|
|
auth_type = "unknown"
|
|
username = "unknown"
|
|
|
|
if RE_SSH_ACCEPT.match(line):
|
|
success = True
|
|
try:
|
|
auth_index = fields.index('Accepted')
|
|
username_index = fields.index('for')
|
|
except ValueError:
|
|
log.error( "Error: sshd line with unknown format: line %d,%s" % (line_cnt, line))
|
|
return
|
|
|
|
auth_type = fields[auth_index +1]
|
|
username = fields[username_index +1]
|
|
|
|
if RE_SSH_FAIL.match(line) and not RE_SSH_EXCLUDE.match(line):
|
|
failed = True
|
|
try:
|
|
auth_index = fields.index('Failed')
|
|
username_index = fields.index('for')
|
|
except ValueError:
|
|
log.error( "Error: sshd line with unknown format: line %d,%s" % (line_cnt, line))
|
|
return
|
|
|
|
auth_type = fields[auth_index + 1]
|
|
if RE_SSH_FAIL_ILLEGAL_USER.match(line):
|
|
username = fields[username_index +3]
|
|
else:
|
|
username = fields[username_index +1]
|
|
|
|
if ip and lh_ip:
|
|
if success:
|
|
print "ssh_login double=%d addr=%s addr=%s string=%s string=%s" % (time_double, ip, lh_ip, username, auth_type)
|
|
|
|
if failed:
|
|
print "ssh_fail_login double=%d addr=%s addr=%s string=%s string=%s" % (time_double, ip, lh_ip, username, auth_type)
|
|
|
|
else:
|
|
log.error( "Error: sshd line with unknown format: line %d" % (line_cnt))
|
|
|
|
|
|
def parse_sudo(line):
|
|
"""
|
|
print out the sudo fields in the broccoli format
|
|
Supports these formats
|
|
1. host sudo(pam_unix)[5835]: authentication failure; logname=user uid=0 euid=0 tty=pts/4 ruser= rhost= user=user
|
|
2. host sudo: user: 3 incorrect password attempts ;
|
|
TTY=pts/11 ; PWD=directory COMMAND=/bin/ls
|
|
|
|
"""
|
|
|
|
fields = line.split()
|
|
time_double = time_conversion(fields[0], fields[1], fields[2])
|
|
|
|
# look for user
|
|
user = "unknown"
|
|
if RE_SUDO_FORMAT1.match(line):
|
|
user = find_user(fields)
|
|
|
|
if RE_SUDO_FORMAT2.match(line):
|
|
user = fields[5]
|
|
|
|
if user == "":
|
|
user = "unknown"
|
|
|
|
# check if need to convert to IP addr
|
|
lh_ip = check_ip(fields[3])
|
|
|
|
if user == "unknown":
|
|
log.debug("unhandled user in next line" )
|
|
log.debug(line)
|
|
|
|
print "failed_sudo double=%d addr=%s string=%s " % (time_double, lh_ip, user )
|
|
|
|
def parse_su_success(line, line_cnt):
|
|
"""
|
|
print out the su fields in the broccoli format
|
|
|
|
This one is hard because there are MANY formats used for this, including:
|
|
This function handles these 3 formats
|
|
1. session opened for user by user
|
|
2. (to root) user
|
|
3. su: SU
|
|
|
|
user to root
|
|
'su root' succeeded for user
|
|
|
|
Not quite done: does not always correctly find logname or username
|
|
"""
|
|
fields = line.split()
|
|
time_double = time_conversion(fields[0], fields[1], fields[2])
|
|
logname = "unknown"
|
|
user = "unknown"
|
|
|
|
if RE_SU_FORMAT1.match(line):
|
|
try:
|
|
index = fields.index('user')
|
|
except ValueError:
|
|
log.error( "Error: su line with unknown format: line %d,%s" % (line_cnt, line))
|
|
return
|
|
|
|
logname = fields[index +1]
|
|
user = fields[index +3]
|
|
|
|
if RE_SU_FORMAT2.match(line):
|
|
logname = fields[6].rstrip(')')
|
|
user = fields[7]
|
|
|
|
if RE_SU_FORMAT3.match(line):
|
|
try:
|
|
index = fields.index('SU')
|
|
except ValueError:
|
|
log.error( "Error: su line with unknown format: line %d,%s" % (line_cnt, line))
|
|
return
|
|
|
|
user = fields[index +1]
|
|
|
|
if user == "unknown":
|
|
log.debug("unhandled case on line: %d " % line_cnt)
|
|
log.debug(line)
|
|
lh_ip = check_ip(fields[3])
|
|
print "successful_su double=%d addr=%s string=%s string=%s" % (time_double, lh_ip, logname, user)
|
|
|
|
|
|
def parse_su_fail(line, line_cnt):
|
|
"""
|
|
print out the su fields in the broccoli format
|
|
This one is hard because there are MANY formats used for this, including:
|
|
authentication failure;
|
|
logname=user uid=uid euid=0 tty= ruser=jason rhost= user=root
|
|
|
|
We match this case only
|
|
1. BAD SU user to root
|
|
These cases are not handled
|
|
FAILED SU (to root) user
|
|
'su root' failed for user
|
|
"""
|
|
fields = line.split()
|
|
time_double = time_conversion(fields[0], fields[1], fields[2])
|
|
user = "unknown"
|
|
|
|
if RE_SU_FAIL_FORMAT1.match(line):
|
|
user = find_user(fields)
|
|
|
|
if RE_SU_FAIL_FORMAT2.match(line):
|
|
fail_test1 = False
|
|
fail_test2 = False
|
|
try:
|
|
index = fields.index('to')
|
|
except:
|
|
fail_test1 = True
|
|
try:
|
|
index = fields.index('(to')
|
|
except:
|
|
fail_test2 = True
|
|
|
|
if fail_test1 and fail_test2:
|
|
log.error("su fail: -to- not found: line %d" % line_cnt)
|
|
else:
|
|
user = fields[index +1]
|
|
|
|
if RE_SU_FAIL_FORMAT3.match(line):
|
|
try:
|
|
index = fields.index('to')
|
|
user = fields[index - 1]
|
|
except:
|
|
log.error("su fail: -to- not found: line %d " % line_cnt)
|
|
|
|
if user == "":
|
|
user = "unknown"
|
|
|
|
if user == "unknown":
|
|
log.debug("unhandled case on line %d" % line_cnt)
|
|
log.debug(line)
|
|
|
|
lh_ip = check_ip(fields[3])
|
|
|
|
print "failed_su double=%d addr=%s string=%s" % (time_double, lh_ip, user)
|
|
|
|
def parse_gate(line, line_cnt):
|
|
"""
|
|
print out the globus fields in the broccoli format
|
|
|
|
Not finished
|
|
"""
|
|
fields = line.split()
|
|
time_double = time_conversion(fields[0], fields[1], fields[2])
|
|
|
|
if RE_GRID_AUTHORIZE_LOCALUSER.match(line):
|
|
gate_ip = check_ip(fields[3])
|
|
pid = fields[5].strip("gatekeeper[]:")
|
|
user = fields[10]
|
|
print "gatekeeper_local_user addr=%s count=%s string=%s string=Authorized" % (gate_ip, pid, user)
|
|
|
|
elif RE_GRID_AUTHORIZE_LOCALUID.match(line):
|
|
gate_ip = check_ip(fields[3])
|
|
pid = fields[5].strip("gatekeeper[]:")
|
|
uid = fields[10]
|
|
print "gatekeeper_local_uid addr=%s count=%s string=%s string=Authorized" % (gate_ip, pid, uid)
|
|
elif RE_GRID_AUTHORIZE_LOCALGID.match(line):
|
|
gate_ip = check_ip(fields[3])
|
|
pid = fields[5].strip("gatekeeper[]:")
|
|
gid = fields[9]
|
|
print "gatekeeper_local_uid addr=%s count=%s string=%s string=Authorized" % (gate_ip, pid, gid)
|
|
elif RE_GRID_AUTHENTICATE.match(line):
|
|
print "authenticate"
|
|
gate_ip = check_ip(fields[3])
|
|
pid = fields[5].strip("gatekeeper[]:")
|
|
dn = " ".join(fields[9:])
|
|
print "gatekeeper_auth_user addr=%s count=%s string=%s string=Authorized" % (gate_ip, pid, dn)
|
|
elif RE_GRID_CONNECT.match(line):
|
|
gate_ip = check_ip(fields[3])
|
|
src_ip = check_ip(fields[8])
|
|
pid = fields[5].strip("gatekeeper[]:")
|
|
print "gateekeeper_connect double=%d addr=%s addr=%s count=%s" % (time_double, gate_ip, src_ip, pid)
|
|
elif RE_GRID_SERVICE.match(line):
|
|
gate_ip = check_ip(fields[3])
|
|
pid = fields[5].strip("gatekeeper[]:")
|
|
service = fields[8]
|
|
print "gatekeeper_service double=%d addr=%s count=%s string=%s" % (time_double, gate_ip, pid, service)
|
|
|
|
else:
|
|
log.debug("unhandled case on line %d" % line_cnt)
|
|
log.debug(line)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_newuser(line):
|
|
"""
|
|
print out the newuser fields in the broccoli format
|
|
|
|
Not finished
|
|
"""
|
|
fields = line.split()
|
|
time_double = time_conversion(fields[0], fields[1], fields[2])
|
|
lh_ip = check_ip(fields[3])
|
|
|
|
#print "new_user double=%d addr=%s string=%s" % (time_double, lh_ip, user)
|
|
|
|
def parse_root_email(line):
|
|
"""
|
|
print out the root email fields in the broccoli format
|
|
|
|
Not finished
|
|
"""
|
|
fields = line.split()
|
|
time_double = time_conversion(fields[0], fields[1], fields[2])
|
|
lh_ip = check_ip(fields[3])
|
|
|
|
#print "root_email double=%d addr=%s addr=%s" % (time_double, lh_ip, ip)
|
|
|
|
|
|
|
|
def log_parse(syslog_file, opts):
|
|
"""
|
|
Continually parse the log file, and print information to stdout
|
|
"""
|
|
|
|
line_cnt = 0
|
|
done = 0
|
|
if opts.begin_tail or opts.begin:
|
|
tail = 0
|
|
else:
|
|
tail = 1
|
|
|
|
day = int(time.strftime("%d")) # day that program is started
|
|
today = time.strftime("%Y-%m-%d")
|
|
|
|
while not done:
|
|
try:
|
|
line = syslog_file.readline()
|
|
except Exception, E:
|
|
log.error ("Error reading file. Possibly log file was rotated, so try to reopen " )
|
|
syslog_file.close()
|
|
fname = "%s/all-%s" % (opts.path, today)
|
|
try:
|
|
syslog_file = open(fname)
|
|
except:
|
|
log.error( "Error opening syslog file %s " % (fname))
|
|
sys.exit(-1)
|
|
|
|
if len(line) == 0 and opts.begin: # if not tailing the file
|
|
done = 1
|
|
log.debug ("End of file. Num lines = %d. Exiting" % line_cnt)
|
|
sys.exit(1);
|
|
|
|
if len(line) == 0 and opts.begin_tail and tail == 0:
|
|
tail = 1 # start tailing the file
|
|
log.debug ("Reached End of file, now tailing the file")
|
|
|
|
line_cnt += 1
|
|
if not (line_cnt % 50000):
|
|
log.debug ("Processed %d lines" % line_cnt)
|
|
|
|
|
|
try:
|
|
if RE_SSH.match(line) and ( RE_SSH_ACCEPT.match(line) or RE_SSH_FAIL.match(line) ):
|
|
parse_ssh(line, line_cnt)
|
|
|
|
elif RE_SUDO.match(line):
|
|
parse_sudo(line)
|
|
|
|
elif RE_SU_SUCCESS.match(line):
|
|
parse_su_success(line, line_cnt)
|
|
|
|
elif RE_SU_FAIL.match(line):
|
|
parse_su_fail(line, line_cnt)
|
|
|
|
elif RE_GRID.match(line):
|
|
parse_gate(line, line_cnt)
|
|
|
|
elif RE_NEWUSER.match(line):
|
|
parse_newuser(line.split())
|
|
|
|
elif RE_ROOT_EMAIL.match(line):
|
|
parse_root_email(line.split())
|
|
|
|
else:
|
|
#This outputs too much information, this should be turned
|
|
#on if we set verbose to the next level
|
|
#log.debug("Not matching line: %s" % line)
|
|
pass
|
|
|
|
except:
|
|
log.error ("Error parsing log file. Corrupt log entry: %s" % line )
|
|
continue
|
|
|
|
sys.stdout.flush()
|
|
|
|
if tail: # go slow if tailing the file
|
|
select.select([], [], [], .01)
|
|
# if tailing the file and path is set,
|
|
#need to roll over to a new file at midnight
|
|
if opts.path:
|
|
check_day = int(time.strftime("%d"))
|
|
if day != check_day:
|
|
# new day, so open new file
|
|
syslog_file.close()
|
|
today = time.strftime("%Y-%m-%d")
|
|
fname = "%s/all-%s" % (opts.path, today)
|
|
log.debug( "New Day, so opening new syslog file: %s " % (fname))
|
|
try:
|
|
syslog_file = open(fname)
|
|
except:
|
|
log.error( "Error opening syslog file %s " % (fname))
|
|
sys.exit(-1)
|
|
day = check_day
|
|
line_cnt = 0
|
|
|
|
|
|
def log_open(opts):
|
|
"""
|
|
open the logfile at the beginning or end
|
|
depending on the command line arguments
|
|
"""
|
|
global log
|
|
logging.basicConfig()
|
|
log = logging.getLogger("sys2broccoli")
|
|
|
|
if opts.verbose:
|
|
log.setLevel(logging.DEBUG)
|
|
else:
|
|
log.setLevel(logging.NOTSET)
|
|
|
|
if opts.path and opts.start_date:
|
|
fname = "%s/all-%s" % (opts.path, opts.start_date)
|
|
else:
|
|
fname = opts.syslog_file
|
|
|
|
try:
|
|
syslog_file = open(fname)
|
|
except:
|
|
log.error( "Error opening syslog file %s " % (fname))
|
|
sys.exit(-1)
|
|
|
|
if opts.begin or opts.begin_tail:
|
|
log.debug("Will start at the beginning of the file.")
|
|
else:
|
|
syslog_file.seek(0, 2)
|
|
|
|
log_parse(syslog_file, opts)
|
|
|
|
|
|
|
|
def main():
|
|
"""
|
|
Read in the command line arguments, then open the log
|
|
"""
|
|
parser = optparse.OptionParser()
|
|
begin_help = """Start at the begining of the syslog file,
|
|
and exit when get to the end"""
|
|
parser.add_option("-b", action="store_true", dest="begin",
|
|
help=begin_help, default=False)
|
|
begin_tail_help = """Start at the begining of the syslog file,
|
|
and tail the file when get to the end"""
|
|
parser.add_option("-B", action="store_true", dest="begin_tail",
|
|
help=begin_tail_help, default=False)
|
|
parser.add_option("-v", "--verbose", action="store_true", dest="verbose",
|
|
help="be more verbose", default=False)
|
|
parser.add_option("-f", "--file", action="store", dest="syslog_file",
|
|
help="Location of the syslog file.",
|
|
default="/var/log/syslog")
|
|
# these are for use on syslog.lbl.gov
|
|
parser.add_option("-d", "--dir", action="store", dest="path",
|
|
help="Directory of the archived syslog files.")
|
|
parser.add_option("-t", "--date", action="store", dest="start_date",
|
|
help="Date of file to process.", default=False)
|
|
opts, args = parser.parse_args()
|
|
heartbeat = HeartBeatThread(900)
|
|
heartbeat.setDaemon(True)
|
|
heartbeat.start()
|
|
log_open(opts)
|
|
|
|
|
|
|
|
if __name__ == "__main__": main()
|