"""
Parse log data into log_events
"""
import re
import datetime
from log_line import LogTypes, LogLine
class LogParseError (Exception) :
"""
Parsing some line failed
"""
def __init__ (self, line, offset, message) :
super(LogParseError, self).__init__("%r@%s: %s" % (line, offset, message))
class LogParser (object) :
"""
Abstract interface
"""
def __init__ (self, tz, timestamp_fmt="%H:%M:%S") :
"""
Setup the parser to use the given format for line timestamps, which are of the given timezone
"""
self.tz = tz
self.timestamp_fmt = timestamp_fmt
def parse_lines (self, channel, lines, date=None, starting_offset=None) :
"""
Parse the given (iterable) lines of unicode text into a LogEvent, no trailing newline.
Channel is the LogChannel that these lines belong to.
Offset is the starting offset, and may be None to not use it.
Giving date lets the parser build full timestamps, otherwise, unless line timestamps have full date
information, event timestamps will have a date component of 1900/1/1.
"""
abstract
class IrssiParser (LogParser) :
"""
A parser for irssi logfiles
"""
# timestamp prefix, with trailing space
_TS = r'(?P<timestamp>[a-zA-Z0-9: ]+[a-zA-Z0-9])\s*'
# subexpression parts
_NICK = r'(?P<nickname>.+?)'
_NICK2 = r'(?P<nickname2>.+?)'
_TARGET = r'(?P<target>.+?)'
_CHAN = r'(?P<channel>.+?)'
_CHAN2 = r'(?P<channel2>.+?)'
_USERHOST = r'(?P<username>.*?)@(?P<hostname>.*?)'
_MSG = r'(?P<message>.*)'
_SRV1 = r'(?P<server1>.+?)'
_SRV2 = r'(?P<server2>.+?)'
# regular expressions for matching lines, by type
TYPE_EXPRS = (
( LogTypes.LOG_OPEN, r'--- Log opened (?P<datetime>.+)' ),
( LogTypes.LOG_CLOSE, r'--- Log closed (?P<datetime>.+)' ),
( LogTypes.MSG, _TS + r'<(?P<flags>.)' + _NICK + '> ' + _MSG ),
( LogTypes.NOTICE, _TS + r'-' + _NICK + ':' + _CHAN + '- ' + _MSG ),
( LogTypes.ACTION, _TS + r'\* ' + _NICK + ' ' + _MSG ),
( LogTypes.JOIN, _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has joined ' + _CHAN ),
( LogTypes.PART, _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has left ' + _CHAN + ' \[(?P<message>.*?)\]' ),
( LogTypes.KICK, _TS + r'-!- ' + _TARGET + ' was kicked from ' + _CHAN + ' by ' + _NICK + ' \[(?P<message>.*?)\]' ),
# XXX: use hostname instead of nickname for ServerMode
( LogTypes.MODE, _TS + r'-!- (mode|ServerMode)/' + _CHAN + ' \[(?P<mode>.+?)\] by (?P<nickname>\S+)' ),
( LogTypes.NICK, _TS + r'-!- ' + _NICK + ' is now known as (?P<target>\S+)' ),
( LogTypes.QUIT, _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has quit \[(?P<message>.*?)\]' ),
( LogTypes.TOPIC, _TS + r'-!- (' + _NICK + ' changed the topic of ' + _CHAN + ' to: (?P<topic>.*)|Topic unset by ' + _NICK2 + ' on ' + _CHAN2 + ')' ),
( LogTypes.SELF_NOTICE, _TS + r'\[notice\(' + _CHAN + '\)\] ' + _MSG ),
( LogTypes.SELF_NICK, _TS + r'-!- You\'re now known as (?P<target>\S+)' ),
( LogTypes.NETSPLIT_START, _TS + r'-!- Netsplit ' + _SRV1 + ' <-> ' + _SRV2 + ' quits: (?P<nick_list>[^(]+)( \(\+(?P<count>\d+) more,\S+\))?'),
( LogTypes.NETSPLIT_END, _TS + r'-!- Netsplit over, joins: (?P<nick_list>[^(]+)( \(\+(?P<count>\d+) more\))?' ),
( 'DAY_CHANGED', r'--- Day changed (?P<date>.+)' ),
)
# precompile
TYPE_REGEXES = [(type, re.compile(expr)) for type, expr in TYPE_EXPRS]
def parse_line (self, channel, line, date, offset=None) :
"""
Parse a single line, and return the resulting LogLine, or None, to ignore the line.
Uses self.TYPE_REGEXES to do the matching
"""
# empty line
if not line :
return
# look for match
match = type = None
# test each type
for type, regex in self.TYPE_REGEXES :
# attempt to match
match = regex.match(line)
# found, break
if match :
break
# no match found?
if not match :
raise LogParseError(line, offset, "Line did not match any type")
# match groups
groups = match.groupdict(None)
# parse timestamp
if 'datetime' in groups :
# parse datetime using default asctime() format
dt = datetime.datetime.strptime(groups['datetime'], '%a %b %d %H:%M:%S %Y')
elif 'timestamp' in groups :
# parse timestamp into naive datetime
dt = datetime.datetime.strptime(groups['timestamp'], self.timestamp_fmt)
# override date?
if date :
dt = dt.replace(year=date.year, month=date.month, day=date.day)
elif 'date' in groups :
# parse date-only datetime
dt = datetime.datetime.strptime(groups['date'], '%a %b %d %Y')
else :
# no timestamp !?
raise LogParseError(line, offset, "No timestamp")
# now localize with timezone
dtz = self.tz.localize(dt)
# channel, currently unused
channel_name = (groups.get('channel') or groups.get('channel2'))
# source
if 'server1' in groups :
source = (None, None, groups.get('server1'), None)
else :
source = (groups.get('nickname') or groups.get('nickname2'), groups.get('username'), groups.get('hostname'), groups.get('flags'))
# target
if 'server2' in groups :
target = groups.get('server2')
else :
target = groups.get('target')
# data
if 'message' in groups :
data = groups['message']
elif 'mode' in groups :
data = groups['mode']
elif 'topic' in groups :
data = groups['topic']
elif 'nick_list' in groups :
# split into components
list = groups['nick_list'].split(', ')
# additional count?
if 'count' in groups and groups['count'] :
list.append('+%d' % int(groups['count']))
# join
data = ' '.join(list)
else :
data = None
# custom types?
if type == 'DAY_CHANGED' :
# new date
date = dtz
else :
# build+return (date, LogLine)
return date, LogLine(channel, offset, type, dtz, source, target, data)
def parse_lines (self, channel, lines, date=None, starting_offset=None) :
"""
Parse the given lines, yielding LogEvents.
"""
for offset, line in enumerate(lines) :
# offset?
if starting_offset :
offset = starting_offset + offset
else :
offset = None
# try and parse
try :
# get None or (date, line)
line_info = self.parse_line(channel, line, date, offset)
# passthrough LogParseError's
except LogParseError :
raise
# wrap other errors as LogParseError
except Exception, e :
raise LogParseError(line, offset, "Parsing line failed: %s" % e)
else :
# nothing?
if not line_info :
continue
# unpack, update date
date, line = line_info
# yield
yield line