terom@50: """ terom@50: Parse log data into log_events terom@50: """ terom@50: terom@50: import datetime terom@50: terom@50: import log_line terom@50: from log_line import LogTypes terom@50: terom@50: class LogParser (object) : terom@50: """ terom@50: Abstract interface terom@50: """ terom@50: terom@50: def __init__ (self, tz, timestamp_fmt="%H:%M:%S") : terom@50: """ terom@50: Setup the parser to use the given format for line timestamps, which are of the given timezone terom@50: """ terom@50: terom@50: self.tz = tz terom@50: self.timestamp_fmt = timestamp_fmt terom@50: terom@64: def parse_lines (self, lines, date=None, starting_offset=None) : terom@50: """ terom@50: Parse the given (iterable) lines of unicode text into a LogEvent, no trailing newline. terom@64: terom@64: Offset is the starting offset, and may be None to not use it. terom@50: terom@50: Giving date lets the parser build full timestamps, otherwise, unless line timestamps have full date terom@50: information, event timestamps will have a date component of 1900/1/1. terom@50: """ terom@50: terom@50: abstract terom@50: terom@50: terom@50: class IrssiParser (LogParser) : terom@50: """ terom@50: A parser for irssi logfiles terom@50: """ terom@50: terom@64: def parse_lines (self, lines, date=None, starting_offset=None) : terom@50: """ terom@50: Parse the given lines, yielding LogEvents. terom@50: """ terom@50: terom@64: for offset, line in enumerate(lines) : terom@50: # status lines terom@50: if line.startswith('---') : terom@50: # XXX: handle these terom@50: continue terom@50: terom@50: # normal lines terom@50: else : terom@50: # XXX: only parse timestamps for now terom@50: timestamp, data = line.split(' ', 1) terom@50: terom@50: # parse timestamp into naive datetime terom@50: dt = datetime.datetime.strptime(timestamp, self.timestamp_fmt) terom@50: terom@50: # override date? terom@50: if date : terom@50: dt = dt.replace(year=date.year, month=date.month, day=date.day) terom@50: terom@50: # now localize with timezone terom@50: dtz = self.tz.localize(dt) terom@50: terom@64: # offset? terom@64: if offset : terom@64: offset = starting_offset + offset terom@64: terom@64: else : terom@64: offset = None terom@64: terom@50: # yield raw events terom@64: yield log_line.LogLine(offset, LogTypes.RAW, dtz, None, data) terom@50: