diff -r aaa62c8e5bd5 -r f13cf27a360b log_parser.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/log_parser.py Mon Feb 09 00:24:13 2009 +0200 @@ -0,0 +1,67 @@ +""" + Parse log data into log_events +""" + +import datetime + +import log_line +from log_line import LogTypes + +class LogParser (object) : + """ + Abstract interface + """ + + def __init__ (self, tz, timestamp_fmt="%H:%M:%S") : + """ + Setup the parser to use the given format for line timestamps, which are of the given timezone + """ + + self.tz = tz + self.timestamp_fmt = timestamp_fmt + + def parse_lines (self, lines, date=None) : + """ + Parse the given (iterable) lines of unicode text into a LogEvent, no trailing newline. + + Giving date lets the parser build full timestamps, otherwise, unless line timestamps have full date + information, event timestamps will have a date component of 1900/1/1. + """ + + abstract + + +class IrssiParser (LogParser) : + """ + A parser for irssi logfiles + """ + + def parse_lines (self, lines, date=None) : + """ + Parse the given lines, yielding LogEvents. + """ + + for line in lines : + # status lines + if line.startswith('---') : + # XXX: handle these + continue + + # normal lines + else : + # XXX: only parse timestamps for now + timestamp, data = line.split(' ', 1) + + # parse timestamp into naive datetime + dt = datetime.datetime.strptime(timestamp, self.timestamp_fmt) + + # override date? + if date : + dt = dt.replace(year=date.year, month=date.month, day=date.day) + + # now localize with timezone + dtz = self.tz.localize(dt) + + # yield raw events + yield log_line.LogLine(LogTypes.RAW, dtz, None, data) +