--- a/log_parser.py Tue Feb 10 22:59:52 2009 +0200
+++ b/log_parser.py Tue Feb 10 23:00:11 2009 +0200
@@ -2,10 +2,18 @@
Parse log data into log_events
"""
+import re
import datetime
-import log_line
-from log_line import LogTypes
+from log_line import LogTypes, LogLine
+
+class LogParseError (Exception) :
+ """
+ Parsing some line failed
+ """
+
+ def __init__ (self, offset, line, message) :
+ super(LogParseError, self).__init__("%r@%s: %s" % (line, offset, message))
class LogParser (object) :
"""
@@ -20,10 +28,12 @@
self.tz = tz
self.timestamp_fmt = timestamp_fmt
- def parse_lines (self, lines, date=None, starting_offset=None) :
+ def parse_lines (self, channel, lines, date=None, starting_offset=None) :
"""
Parse the given (iterable) lines of unicode text into a LogEvent, no trailing newline.
+ Channel is the LogChannel that these lines belong to.
+
Offset is the starting offset, and may be None to not use it.
Giving date lets the parser build full timestamps, otherwise, unless line timestamps have full date
@@ -32,13 +42,114 @@
abstract
-
class IrssiParser (LogParser) :
"""
A parser for irssi logfiles
"""
- def parse_lines (self, lines, date=None, starting_offset=None) :
+ # subexpression parts
+ _TS = r'(?P<timestamp>\S+)'
+ _NICK = r'(?P<nickname>.+?)'
+ _NICK2 = r'(?P<nickname2>.+?)'
+ _CHAN = r'(?P<channel>.+?)'
+ _USERHOST = r'(?P<username>.*?)@(?P<hostname>.*?)'
+ _MSG = r'(?P<message>.*)'
+
+ # regular expressions for matching lines, by type
+ TYPE_EXPRS = (
+ ( LogTypes.LOG_OPEN, r'--- Log opened (?P<datetime>.+)' ),
+ ( LogTypes.LOG_CLOSE, r'--- Log closed (?P<datetime>.+)' ),
+ ( LogTypes.MSG, _TS + r' <(?P<flags>.)' + _NICK + '> ' + _MSG ),
+ ( LogTypes.NOTICE, _TS + r' -' + _NICK + ':' + _CHAN + '- ' + _MSG ),
+ ( LogTypes.ACTION, _TS + r' \* ' + _NICK + ' ' + _MSG ),
+ ( LogTypes.JOIN, _TS + r' -!- ' + _NICK + ' \[' + _USERHOST + '\] has joined ' + _CHAN ),
+ ( LogTypes.PART, _TS + r' -!- ' + _NICK + ' \[' + _USERHOST + '\] has left ' + _CHAN + ' \[(?P<message>.*?)\]' ),
+ ( LogTypes.KICK, _TS + r' -!- ' + _NICK2 + ' was kicked from ' + _CHAN + ' by ' + _NICK + ' \[(?P<message>.*?)\]' ),
+ ( LogTypes.MODE, _TS + r' -!- mode/' + _CHAN + ' \[(?P<mode>.+?)\] by (?P<nickname>\S+)' ),
+ ( LogTypes.NICK, _TS + r' -!- ' + _NICK + ' is now known as (?P<nickname2>\S+)' ),
+ ( LogTypes.QUIT, _TS + r' -!- ' + _NICK + ' \[' + _USERHOST + '\] has quit \[(?P<message>.*?)\]' ),
+ ( LogTypes.TOPIC, _TS + r' -!- ' + _NICK + ' changed the topic of ' + _CHAN + ' to: (?P<topic>.*)' ),
+
+ ( LogTypes.SELF_NOTICE, _TS + r' \[notice\(' + _CHAN + '\)\] ' + _MSG ),
+ ( LogTypes.SELF_NICK, _TS + r' -!- You\'re now known as (?P<nickname2>\S+)' ),
+ )
+
+ # precompile
+ TYPE_REGEXES = [(type, re.compile(expr)) for type, expr in TYPE_EXPRS]
+
+ def parse_line (self, channel, line, date, offset=None) :
+ """
+ Parse a single line, and return the resulting LogLine, or None, to ignore the line.
+
+ Uses self.TYPE_REGEXES to do the matching
+ """
+
+ # empty line
+ if not line :
+ return
+
+ # look for match
+ match = type = None
+
+ # test each type
+ for type, regex in self.TYPE_REGEXES :
+ # attempt to match
+ match = regex.match(line)
+
+ # found, break
+ if match :
+ break
+
+ # no match found?
+ if not match :
+ raise LogParseError(offset, line, "Line did not match any type")
+
+ # match groups
+ groups = match.groupdict(None)
+
+ # parse timestamp
+ if 'datetime' in groups :
+ # parse datetime using default asctime() format
+ dt = datetime.datetime.strptime(groups['datetime'], '%a %b %d %H:%M:%S %Y')
+
+ elif 'timestamp' in groups :
+ # parse timestamp into naive datetime
+ dt = datetime.datetime.strptime(groups['timestamp'], self.timestamp_fmt)
+
+ # override date?
+ if date :
+ dt = dt.replace(year=date.year, month=date.month, day=date.day)
+
+ else :
+ # no timestamp !?
+ raise LogParseError(offset, line, "No timestamp")
+
+ # now localize with timezone
+ dtz = self.tz.localize(dt)
+
+ # source
+ source = (groups.get('nickname'), groups.get('username'), groups.get('hostname'), groups.get('flags'))
+
+ # target
+ target = groups.get('nickname2')
+
+ # data
+ if 'message' in groups :
+ data = groups['message']
+
+ elif 'mode' in groups :
+ data = groups['mode']
+
+ elif 'topic' in groups :
+ data = groups['topic']
+
+ else :
+ data = None
+
+ # build+return LogLine
+ return LogLine(channel, offset, type, dtz, source, target, data)
+
+ def parse_lines (self, channel, lines, date=None, starting_offset=None) :
"""
Parse the given lines, yielding LogEvents.
"""
@@ -53,45 +164,19 @@
# try and parse
try :
- line = self.parse_line(line, date, offset)
-
+ line = self.parse_line(channel, line, date, offset)
+
+ # passthrough LogParseError's
+ except LogParseError :
+ raise
+
+ # wrap other errors as LogParseError
except Exception, e :
- raise Exception("Parsing line failed: %r@%d: %s" % (line, offset, e))
+ raise LogParseError(line, offset, "Parsing line failed: %s" % e)
else :
# yield unless None
if line :
yield line
- def parse_line (self, line, date, offset=None) :
- """
- Parse a single line, and return the resulting LogLine, or None, to ignore the line
- """
-
- # empty line
- if not line :
- return
- # status lines
- elif line.startswith('---') :
- # XXX: handle these
- return
-
- # normal lines
- else :
- # XXX: only parse timestamps for now
- timestamp, data = line.split(' ', 1)
-
- # parse timestamp into naive datetime
- dt = datetime.datetime.strptime(timestamp, self.timestamp_fmt)
-
- # override date?
- if date :
- dt = dt.replace(year=date.year, month=date.month, day=date.day)
-
- # now localize with timezone
- dtz = self.tz.localize(dt)
-
- # build raw event
- return log_line.LogLine(offset, LogTypes.RAW, dtz, None, data)
-