mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 06:38:20 +00:00
182 lines
4.8 KiB
Python
Executable file
182 lines
4.8 KiB
Python
Executable file
#!/usr/bin/env python
|
|
import re
|
|
import os
|
|
import sys
|
|
import time
|
|
import string
|
|
import math
|
|
import getopt
|
|
|
|
rawlogs=None
|
|
processedlogs=None
|
|
|
|
# invoke a sed script to remove the last byte from the ips
|
|
def maskit(file):
|
|
cmd = "sed -f mask-addr.sed %s > %s.masked" % (file,file)
|
|
ret = os.system(cmd)
|
|
if ret != 0:
|
|
print "error with %s" % cmd
|
|
cmd = "rm %s" % file
|
|
ret = os.system(cmd)
|
|
if ret != 0:
|
|
print "error with %s" % cmd
|
|
cmd = "mv %s.masked %s" % (file, file)
|
|
ret = os.system(cmd)
|
|
if ret != 0:
|
|
print "error with %s" % cmd
|
|
|
|
def get_files(dir, myfilter='.*\.example$', includezero = False):
|
|
"""get all '*.example' files"""
|
|
SIZE = 6
|
|
flist=[]
|
|
files = os.listdir(dir)
|
|
test = re.compile(myfilter, re.IGNORECASE)
|
|
files = filter(test.search, files)
|
|
for f in files:
|
|
s = os.stat(dir + '/' + f)[SIZE]
|
|
if s > 0 or includezero:
|
|
flist.append(f)
|
|
return flist
|
|
|
|
def sort_conn(f):
|
|
# move to new file
|
|
cmd = "mv %s %s.sortme" % (f,f)
|
|
ret = os.system(cmd)
|
|
if ret != 0:
|
|
print "error with %s" % cmd
|
|
# sort it
|
|
cmd = "sort %s.sortme > %s" % (f, f)
|
|
ret = os.system(cmd)
|
|
if ret != 0:
|
|
print "error with %s" % cmd
|
|
# we can allow a one byte difference (probably newline)
|
|
if math.fabs(os.stat(f)[6] - size) >= 2:
|
|
print "Error sizes don't match! %d != %d (%s)" % ( os.stat(f)[6], size, f)
|
|
sys.exit(1)
|
|
# remove old file (now called .sortme)
|
|
cmd = "rm %s" % (f + ".sortme")
|
|
ret = os.system(cmd)
|
|
if ret != 0:
|
|
print "error with %s" % cmd
|
|
|
|
def move_it(f,fname):
|
|
# move it on over
|
|
cmd = "mv %s %s/%s" % ( f, processedlogs, fname )
|
|
ret = os.system(cmd)
|
|
if ret != 0:
|
|
print "error with %s" % cmd
|
|
|
|
def usage(msg=None):
|
|
if msg != None:
|
|
print msg
|
|
print """process_bro_logs.py -h -l logsdir -r rawlogsdir"""
|
|
print """ -h This help message"""
|
|
print """ -l logsdir Directory where the logs should end up"""
|
|
print """ -r rawlogsdir Directory where the raw logs reside"""
|
|
sys.exit(1)
|
|
|
|
####################################################
|
|
# This is the start of the script
|
|
|
|
|
|
try:
|
|
options,prog_args = getopt.getopt(sys.argv[1:],'hl:r:')
|
|
except getopt.GetoptError, E:
|
|
usage(E)
|
|
|
|
for opt,val in options:
|
|
if opt == '-l':
|
|
processedlogs = val
|
|
elif opt == '-r':
|
|
rawlogs = val
|
|
else:
|
|
usage()
|
|
|
|
|
|
if rawlogs == None or processedlogs == None:
|
|
usage()
|
|
|
|
# get to the right place
|
|
os.chdir(rawlogs)
|
|
|
|
# look for logs that have been split
|
|
fl1 = get_files(rawlogs,
|
|
myfilter='^(\w+)\.\w+\.(\d{2})-(\d{2})-(\d{2})[-_](\d{2})[:.](\d{2})[:.](\d{2})\.[0-9]+\.[0-9]+\.[0-9]+$')
|
|
|
|
for f in fl1:
|
|
print "Working on split file: ", f
|
|
# grab times before we mess with it
|
|
size,atime,mtime,ctime = os.stat(f)[6:10]
|
|
type,host = string.split(f,'.')[0:2]
|
|
|
|
broend = string.split(f, ".")[-2:-1][0]
|
|
|
|
# only sort conn files
|
|
if f[:4] == 'conn':
|
|
sort_conn(f)
|
|
|
|
cmd = 'sync'
|
|
ret = os.system(cmd)
|
|
#grab the 2nd timestamp
|
|
cmd = 'head -2 %s | tail -1' % f
|
|
if ret != 0:
|
|
print "error with %s" % cmd
|
|
|
|
fo=os.popen(cmd)
|
|
buf = fo.read()
|
|
fo.close()
|
|
brostart = buf.split('.')[:1]
|
|
|
|
# sanity check
|
|
if brostart[0] < 1090000000 or len(brostart[0]) != 10:
|
|
print "File error! Stopping"
|
|
sys.exit(1)
|
|
|
|
# construct new filenaem
|
|
fname = "%s.%s.%s-%s" % (type,host,brostart[0],broend)
|
|
|
|
# does a file with name already exist?
|
|
if os.access("%s/%s" % (processedlogs, fname), os.F_OK):
|
|
print "File %s already exists" % fname
|
|
print "Skipping %s" % fname
|
|
continue
|
|
|
|
move_it(f,fname)
|
|
os.utime("%s/%s" % (processedlogs,fname), (mtime,mtime))
|
|
print "Done with %s" % f
|
|
# lets not run too fast
|
|
time.sleep(3)
|
|
continue
|
|
|
|
# look for files that haven't been split
|
|
fl2 = get_files(rawlogs,
|
|
myfilter='^(\w+)\.\w+\.(\d{2})-(\d{2})-(\d{2})[-_](\d{2})[:.](\d{2})[:.](\d{2})$')
|
|
|
|
for f in fl2:
|
|
print "Working on file: ", f
|
|
# grab times before we mess with it
|
|
size,atime,mtime,ctime = os.stat(f)[6:10]
|
|
type,host = string.split(f,'.')[0:2]
|
|
|
|
brostart = string.join(string.split(f, ".", 2)[2:])
|
|
foo = list(time.strptime(brostart, '%Y-%m-%d_%H.%M.%S'))
|
|
|
|
# toggle guessing of daylight savings, grrrr
|
|
foo[-1] = -1
|
|
bs = time.mktime(foo)
|
|
fname = "%s.%s.%d-%s" % (type,host,bs,mtime)
|
|
|
|
if os.access("%s/%s" % (processedlogs,fname), os.F_OK):
|
|
print "File %s already exists, skipping" % fname
|
|
continue
|
|
|
|
# sort conn files
|
|
if f[:4] == 'conn':
|
|
sort_conn(f)
|
|
|
|
move_it(f, fname)
|
|
os.utime("%s/%s" % (processedlogs,fname), (mtime,mtime))
|
|
print "Done with %s (%s)" % (fname,f)
|
|
# lets not overrun things
|
|
time.sleep(3)
|
|
continue
|