zeek/aux/contrib/scripts/syslog2broccoli.py

600 lines
20 KiB
Python

#! /usr/bin/env python
#
# this script finds the end of the syslog file, and then watches
# for new events to send to Bro. Actually they are reformatted
# as Broccoli events, and written to stdout. Then some other
# process can send them to Bro.
#
# started as a perl script from unknon source
# modified for syslog parsing by Scott Campbell
# more options added by Brian Tierney
#
# CHANGELOG: started 07/07/05
# fixed IPv6 support in sshd login analysis
# 07/12/05 change logic in ssh deny parsing to only look at what
# we want rather than reverse.
#
"""
script that looks at interesting entries in a syslog file
and print out information in a format that broccoli understands
"""
import optparse, logging, re, time
import select, socket, sys, threading
RE_SSH = re.compile(r"[\w,\s,\W]*sshd")
RE_SSH_ACCEPT = re.compile(r"[\w,\s,\W]*Accept")
# all Failures; leads to false possitives
# RE_SSH_FAIL = re.compile(r"[\w,\s,\W]*Failed")
# just failed passwords
RE_SSH_FAIL = re.compile(r"[\w,\s,\W]*Failed[\s]*password")
RE_SSH_FAIL_ILLEGAL_USER = re.compile(r"[\w,\s,\W]*illegal[\s]*user|[\w,\s,\W]*invalid[\s]*user")
RE_SSH_EXCLUDE = re.compile(r"[\w,\s,\W]*com\.apple\.SecurityServer")
# only do failures for now
RE_SUDO = re.compile(r"[\w,\s,\W]*sudo[\w,\s,\W]+failure|[\w,\s,\W]*sudo[\w,\s,\W]+incorrect password attempts")
RE_SUDO_FORMAT1 = re.compile(r"[\w,\s,\W]*sudo[\w,\s,\W]+failure")
RE_SUDO_FORMAT2 = re.compile(r"[\w,\s,\W]*sudo[\w,\s,\W]+incorrect password attempts")
RE_SU_SUCCESS = re.compile(r"[\w,\s\W]*su: \(|[\w,\s,\W]*su: SU |[\w,\s,\W]*su[\w,s,\W]+session opened")
RE_SU_FORMAT1 = re.compile(r"[\w,\s,\W]*session opened for user [\w,\W]+ by [\w,\W]+")
RE_SU_FORMAT2 = re.compile(r"[\w,\s,\W]*su:[\s]*\(to [\w]+\) [\w]+")
RE_SU_FORMAT3 = re.compile(r"[\w,\s,\W]*su: SU")
RE_SU_FAIL = re.compile(r"[\w,\s\W]* BAD SU |[\w,\s,\W]* FAILED SU |[\w,\s,\W]*su[\w,\s,\W]*authentication failure")
RE_SU_FAIL_FORMAT1 = re.compile(r"[\w,\s,\W]*authentication failure")
RE_SU_FAIL_FORMAT2 = re.compile(r"[\w,\s,\W]*FAILED SU")
RE_SU_FAIL_FORMAT3 = re.compile(r"[\w,\s,\W]*BAD SU")
RE_GRID = re.compile(r"[\w,\s\W]* GRAM")
RE_GRID_AUTHORIZE_LOCALUSER = re.compile(r"[\w,\s,\W]* Authorized as local user")
RE_GRID_AUTHORIZE_LOCALUID = re.compile(r"[\w,\s,\W]* Authorized as local uid:")
RE_GRID_AUTHORIZE_LOCALGID = re.compile(r"[\w,\s,\W]* and local gid:")
RE_GRID_AUTHENTICATE = re.compile(r"[\w,\s,\W]* Authenticated globus user:")
RE_GRID_CONNECT = re.compile(r"[\w,\s,\W]* Got connection ")
RE_GRID_SERVICE = re.compile(r"[\w,\s,\W]* Requested service: ")
RE_GRID_INFO = re.compile(r"[\w,\s,W]*gridinfo")
# not done: generate Bro event for these too
RE_NEWUSER = re.compile(r"[\w,\s,\W]*new user:[\w,\s,\W]+useradd")
# not done: generate Bro event for user root sending mail to yahoo, gmail, hotmail, aol, etc.
# (maybe even any .com ?)
RE_ROOT_EMAIL = re.compile(r"[\w,\s,\W]*sendmail[\w,\s,\W]+root[\w,\s,\W]+to `[\w,\s,\W]+\.com")
class HeartBeatThread(threading.Thread):
"""
HeartBeat class that inherits from Python Thread class
"""
def __init__(self, sleep_seconds):
threading.Thread.__init__(self)
self._sleeptime = sleep_seconds
def run(self):
"""
Sends out a heartbeat event, then goes to sleep for 15 minutes
"""
addr = socket.gethostbyname(socket.gethostname())
heartbeat_string = "Syslog_daemon_heartbeat"
while True:
time_double = time.time()
print "heartbeat_event double=%d addr=%s string=%s" % (time_double, addr, heartbeat_string)
time.sleep(self._sleeptime)
def time_conversion(month, date, clocktime):
"""
Convert time string to double, need to handle the
year field
"""
year = time.asctime().split()[-1:][0]
time_str = " ".join((month, date, clocktime, year))
try:
time_tuple = time.strptime(time_str, "%b %d %H:%M:%S %Y")
except:
log.error( "time.strptime error converting %s" % time_str )
return 0.0
time_double = time.mktime(time_tuple)
return time_double
def check_ip(ip):
"""
Covert hostname to IP if necessary, and check if valid IP
"""
try:
ip = socket.gethostbyname(ip)
except:
log.error( "Error converting %s to an IP " % ip )
return ""
# if passed in something that looked like an IP, gethostbyname might not return an error, so best to check
try:
ips = ip.split('.')
except:
log.error("Error spliting IP into components: %s" % ip)
return ""
if len(ips) == 4:
if int(ips[0]) < 256 and int(ips[1]) < 256 and int(ips[2]) < 256 and int(ips[3]) < 256:
return ip
else:
return ""
else:
return ""
def find_user(fields):
"""
Find the user in a list of fields where user is the name in user=name
"""
user = "unknown"
for f in fields:
try:
user1, user2 = f.split('=')
if user1 == 'user' or user1 == 'ruser':
if user2 != "":
return user2
except:
pass
return user
def parse_ssh(line, line_cnt):
"""
print out the ssh fields into the broccoli format
Note: still needs to handle odd syslog formats, such as (double set of timestamps):
Jan 1 00:03:44 127.0.0.1 2005-12-31 21:51:10.163447500 isthiswhatyouwant.jay.lbl.gov sshd[] PAM: Authentication failure for ldoolitt from astound-69-42-20-231.ca.astound.net
There are many different formats, but the following seem fairly consistant:
for username
from hostname
so look for works "for" and "from", and then take the fields after that
"""
fields = line.split()
time_double = time_conversion(fields[0], fields[1], fields[2])
# look for 'from' hostname
n = 0
from_ip = ""
for f in fields:
if f == "from":
from_ip = fields[n+1]
break
n += 1
# check for valid IP (some look like this: "::ffff:128.3.60.86")
ipf = from_ip.split(':')
if len(ipf) > 1:
ip = ipf[len(ipf) - 1]
else:
ip = ipf[0]
# verify that this is a valid IP address
ip = check_ip(ip)
lh_ip = check_ip(fields[3])
success = False
failed = False
auth_type = "unknown"
username = "unknown"
if RE_SSH_ACCEPT.match(line):
success = True
try:
auth_index = fields.index('Accepted')
username_index = fields.index('for')
except ValueError:
log.error( "Error: sshd line with unknown format: line %d,%s" % (line_cnt, line))
return
auth_type = fields[auth_index +1]
username = fields[username_index +1]
if RE_SSH_FAIL.match(line) and not RE_SSH_EXCLUDE.match(line):
failed = True
try:
auth_index = fields.index('Failed')
username_index = fields.index('for')
except ValueError:
log.error( "Error: sshd line with unknown format: line %d,%s" % (line_cnt, line))
return
auth_type = fields[auth_index + 1]
if RE_SSH_FAIL_ILLEGAL_USER.match(line):
username = fields[username_index +3]
else:
username = fields[username_index +1]
if ip and lh_ip:
if success:
print "ssh_login double=%d addr=%s addr=%s string=%s string=%s" % (time_double, ip, lh_ip, username, auth_type)
if failed:
print "ssh_fail_login double=%d addr=%s addr=%s string=%s string=%s" % (time_double, ip, lh_ip, username, auth_type)
else:
log.error( "Error: sshd line with unknown format: line %d" % (line_cnt))
def parse_sudo(line):
"""
print out the sudo fields in the broccoli format
Supports these formats
1. host sudo(pam_unix)[5835]: authentication failure; logname=user uid=0 euid=0 tty=pts/4 ruser= rhost= user=user
2. host sudo: user: 3 incorrect password attempts ;
TTY=pts/11 ; PWD=directory COMMAND=/bin/ls
"""
fields = line.split()
time_double = time_conversion(fields[0], fields[1], fields[2])
# look for user
user = "unknown"
if RE_SUDO_FORMAT1.match(line):
user = find_user(fields)
if RE_SUDO_FORMAT2.match(line):
user = fields[5]
if user == "":
user = "unknown"
# check if need to convert to IP addr
lh_ip = check_ip(fields[3])
if user == "unknown":
log.debug("unhandled user in next line" )
log.debug(line)
print "failed_sudo double=%d addr=%s string=%s " % (time_double, lh_ip, user )
def parse_su_success(line, line_cnt):
"""
print out the su fields in the broccoli format
This one is hard because there are MANY formats used for this, including:
This function handles these 3 formats
1. session opened for user by user
2. (to root) user
3. su: SU
user to root
'su root' succeeded for user
Not quite done: does not always correctly find logname or username
"""
fields = line.split()
time_double = time_conversion(fields[0], fields[1], fields[2])
logname = "unknown"
user = "unknown"
if RE_SU_FORMAT1.match(line):
try:
index = fields.index('user')
except ValueError:
log.error( "Error: su line with unknown format: line %d,%s" % (line_cnt, line))
return
logname = fields[index +1]
user = fields[index +3]
if RE_SU_FORMAT2.match(line):
logname = fields[6].rstrip(')')
user = fields[7]
if RE_SU_FORMAT3.match(line):
try:
index = fields.index('SU')
except ValueError:
log.error( "Error: su line with unknown format: line %d,%s" % (line_cnt, line))
return
user = fields[index +1]
if user == "unknown":
log.debug("unhandled case on line: %d " % line_cnt)
log.debug(line)
lh_ip = check_ip(fields[3])
print "successful_su double=%d addr=%s string=%s string=%s" % (time_double, lh_ip, logname, user)
def parse_su_fail(line, line_cnt):
"""
print out the su fields in the broccoli format
This one is hard because there are MANY formats used for this, including:
authentication failure;
logname=user uid=uid euid=0 tty= ruser=jason rhost= user=root
We match this case only
1. BAD SU user to root
These cases are not handled
FAILED SU (to root) user
'su root' failed for user
"""
fields = line.split()
time_double = time_conversion(fields[0], fields[1], fields[2])
user = "unknown"
if RE_SU_FAIL_FORMAT1.match(line):
user = find_user(fields)
if RE_SU_FAIL_FORMAT2.match(line):
fail_test1 = False
fail_test2 = False
try:
index = fields.index('to')
except:
fail_test1 = True
try:
index = fields.index('(to')
except:
fail_test2 = True
if fail_test1 and fail_test2:
log.error("su fail: -to- not found: line %d" % line_cnt)
else:
user = fields[index +1]
if RE_SU_FAIL_FORMAT3.match(line):
try:
index = fields.index('to')
user = fields[index - 1]
except:
log.error("su fail: -to- not found: line %d " % line_cnt)
if user == "":
user = "unknown"
if user == "unknown":
log.debug("unhandled case on line %d" % line_cnt)
log.debug(line)
lh_ip = check_ip(fields[3])
print "failed_su double=%d addr=%s string=%s" % (time_double, lh_ip, user)
def parse_gate(line, line_cnt):
"""
print out the globus fields in the broccoli format
Not finished
"""
fields = line.split()
time_double = time_conversion(fields[0], fields[1], fields[2])
if RE_GRID_AUTHORIZE_LOCALUSER.match(line):
gate_ip = check_ip(fields[3])
pid = fields[5].strip("gatekeeper[]:")
user = fields[10]
print "gatekeeper_local_user addr=%s count=%s string=%s string=Authorized" % (gate_ip, pid, user)
elif RE_GRID_AUTHORIZE_LOCALUID.match(line):
gate_ip = check_ip(fields[3])
pid = fields[5].strip("gatekeeper[]:")
uid = fields[10]
print "gatekeeper_local_uid addr=%s count=%s string=%s string=Authorized" % (gate_ip, pid, uid)
elif RE_GRID_AUTHORIZE_LOCALGID.match(line):
gate_ip = check_ip(fields[3])
pid = fields[5].strip("gatekeeper[]:")
gid = fields[9]
print "gatekeeper_local_uid addr=%s count=%s string=%s string=Authorized" % (gate_ip, pid, gid)
elif RE_GRID_AUTHENTICATE.match(line):
print "authenticate"
gate_ip = check_ip(fields[3])
pid = fields[5].strip("gatekeeper[]:")
dn = " ".join(fields[9:])
print "gatekeeper_auth_user addr=%s count=%s string=%s string=Authorized" % (gate_ip, pid, dn)
elif RE_GRID_CONNECT.match(line):
gate_ip = check_ip(fields[3])
src_ip = check_ip(fields[8])
pid = fields[5].strip("gatekeeper[]:")
print "gateekeeper_connect double=%d addr=%s addr=%s count=%s" % (time_double, gate_ip, src_ip, pid)
elif RE_GRID_SERVICE.match(line):
gate_ip = check_ip(fields[3])
pid = fields[5].strip("gatekeeper[]:")
service = fields[8]
print "gatekeeper_service double=%d addr=%s count=%s string=%s" % (time_double, gate_ip, pid, service)
else:
log.debug("unhandled case on line %d" % line_cnt)
log.debug(line)
def parse_newuser(line):
"""
print out the newuser fields in the broccoli format
Not finished
"""
fields = line.split()
time_double = time_conversion(fields[0], fields[1], fields[2])
lh_ip = check_ip(fields[3])
#print "new_user double=%d addr=%s string=%s" % (time_double, lh_ip, user)
def parse_root_email(line):
"""
print out the root email fields in the broccoli format
Not finished
"""
fields = line.split()
time_double = time_conversion(fields[0], fields[1], fields[2])
lh_ip = check_ip(fields[3])
#print "root_email double=%d addr=%s addr=%s" % (time_double, lh_ip, ip)
def log_parse(syslog_file, opts):
"""
Continually parse the log file, and print information to stdout
"""
line_cnt = 0
done = 0
if opts.begin_tail or opts.begin:
tail = 0
else:
tail = 1
day = int(time.strftime("%d")) # day that program is started
today = time.strftime("%Y-%m-%d")
while not done:
try:
line = syslog_file.readline()
except Exception, E:
log.error ("Error reading file. Possibly log file was rotated, so try to reopen " )
syslog_file.close()
fname = "%s/all-%s" % (opts.path, today)
try:
syslog_file = open(fname)
except:
log.error( "Error opening syslog file %s " % (fname))
sys.exit(-1)
if len(line) == 0 and opts.begin: # if not tailing the file
done = 1
log.debug ("End of file. Num lines = %d. Exiting" % line_cnt)
sys.exit(1);
if len(line) == 0 and opts.begin_tail and tail == 0:
tail = 1 # start tailing the file
log.debug ("Reached End of file, now tailing the file")
line_cnt += 1
if not (line_cnt % 50000):
log.debug ("Processed %d lines" % line_cnt)
try:
if RE_SSH.match(line) and ( RE_SSH_ACCEPT.match(line) or RE_SSH_FAIL.match(line) ):
parse_ssh(line, line_cnt)
elif RE_SUDO.match(line):
parse_sudo(line)
elif RE_SU_SUCCESS.match(line):
parse_su_success(line, line_cnt)
elif RE_SU_FAIL.match(line):
parse_su_fail(line, line_cnt)
elif RE_GRID.match(line):
parse_gate(line, line_cnt)
elif RE_NEWUSER.match(line):
parse_newuser(line.split())
elif RE_ROOT_EMAIL.match(line):
parse_root_email(line.split())
else:
#This outputs too much information, this should be turned
#on if we set verbose to the next level
#log.debug("Not matching line: %s" % line)
pass
except:
log.error ("Error parsing log file. Corrupt log entry: %s" % line )
continue
sys.stdout.flush()
if tail: # go slow if tailing the file
select.select([], [], [], .01)
# if tailing the file and path is set,
#need to roll over to a new file at midnight
if opts.path:
check_day = int(time.strftime("%d"))
if day != check_day:
# new day, so open new file
syslog_file.close()
today = time.strftime("%Y-%m-%d")
fname = "%s/all-%s" % (opts.path, today)
log.debug( "New Day, so opening new syslog file: %s " % (fname))
try:
syslog_file = open(fname)
except:
log.error( "Error opening syslog file %s " % (fname))
sys.exit(-1)
day = check_day
line_cnt = 0
def log_open(opts):
"""
open the logfile at the beginning or end
depending on the command line arguments
"""
global log
logging.basicConfig()
log = logging.getLogger("sys2broccoli")
if opts.verbose:
log.setLevel(logging.DEBUG)
else:
log.setLevel(logging.NOTSET)
if opts.path and opts.start_date:
fname = "%s/all-%s" % (opts.path, opts.start_date)
else:
fname = opts.syslog_file
try:
syslog_file = open(fname)
except:
log.error( "Error opening syslog file %s " % (fname))
sys.exit(-1)
if opts.begin or opts.begin_tail:
log.debug("Will start at the beginning of the file.")
else:
syslog_file.seek(0, 2)
log_parse(syslog_file, opts)
def main():
"""
Read in the command line arguments, then open the log
"""
parser = optparse.OptionParser()
begin_help = """Start at the begining of the syslog file,
and exit when get to the end"""
parser.add_option("-b", action="store_true", dest="begin",
help=begin_help, default=False)
begin_tail_help = """Start at the begining of the syslog file,
and tail the file when get to the end"""
parser.add_option("-B", action="store_true", dest="begin_tail",
help=begin_tail_help, default=False)
parser.add_option("-v", "--verbose", action="store_true", dest="verbose",
help="be more verbose", default=False)
parser.add_option("-f", "--file", action="store", dest="syslog_file",
help="Location of the syslog file.",
default="/var/log/syslog")
# these are for use on syslog.lbl.gov
parser.add_option("-d", "--dir", action="store", dest="path",
help="Directory of the archived syslog files.")
parser.add_option("-t", "--date", action="store", dest="start_date",
help="Date of file to process.", default=False)
opts, args = parser.parse_args()
heartbeat = HeartBeatThread(900)
heartbeat.setDaemon(True)
heartbeat.start()
log_open(opts)
if __name__ == "__main__": main()