terom@50: """ terom@50: Parse log data into log_events terom@50: """ terom@50: terom@50: import datetime terom@50: terom@50: import log_line terom@50: from log_line import LogTypes terom@50: terom@50: class LogParser (object) : terom@50: """ terom@50: Abstract interface terom@50: """ terom@50: terom@50: def __init__ (self, tz, timestamp_fmt="%H:%M:%S") : terom@50: """ terom@50: Setup the parser to use the given format for line timestamps, which are of the given timezone terom@50: """ terom@50: terom@50: self.tz = tz terom@50: self.timestamp_fmt = timestamp_fmt terom@50: terom@64: def parse_lines (self, lines, date=None, starting_offset=None) : terom@50: """ terom@50: Parse the given (iterable) lines of unicode text into a LogEvent, no trailing newline. terom@64: terom@64: Offset is the starting offset, and may be None to not use it. terom@50: terom@50: Giving date lets the parser build full timestamps, otherwise, unless line timestamps have full date terom@50: information, event timestamps will have a date component of 1900/1/1. terom@50: """ terom@50: terom@50: abstract terom@50: terom@50: terom@50: class IrssiParser (LogParser) : terom@50: """ terom@50: A parser for irssi logfiles terom@50: """ terom@50: terom@64: def parse_lines (self, lines, date=None, starting_offset=None) : terom@50: """ terom@50: Parse the given lines, yielding LogEvents. terom@50: """ terom@65: terom@64: for offset, line in enumerate(lines) : terom@83: # offset? terom@83: if starting_offset : terom@83: offset = starting_offset + offset terom@83: terom@50: else : terom@83: offset = None terom@83: terom@83: # try and parse terom@83: try : terom@83: line = self.parse_line(line, date, offset) terom@50: terom@83: except Exception, e : terom@83: raise Exception("Parsing line failed: %r@%d: %s" % (line, offset, e)) terom@83: terom@83: else : terom@83: # yield unless None terom@83: if line : terom@83: yield line terom@64: terom@83: def parse_line (self, line, date, offset=None) : terom@83: """ terom@83: Parse a single line, and return the resulting LogLine, or None, to ignore the line terom@83: """ terom@83: terom@83: # empty line terom@83: if not line : terom@83: return terom@64: terom@83: # status lines terom@83: elif line.startswith('---') : terom@83: # XXX: handle these terom@83: return terom@83: terom@83: # normal lines terom@83: else : terom@83: # XXX: only parse timestamps for now terom@83: timestamp, data = line.split(' ', 1) terom@83: terom@83: # parse timestamp into naive datetime terom@83: dt = datetime.datetime.strptime(timestamp, self.timestamp_fmt) terom@83: terom@83: # override date? terom@83: if date : terom@83: dt = dt.replace(year=date.year, month=date.month, day=date.day) terom@83: terom@83: # now localize with timezone terom@83: dtz = self.tz.localize(dt) terom@50: terom@83: # build raw event terom@83: return log_line.LogLine(offset, LogTypes.RAW, dtz, None, data) terom@83: