log_parser.py
author Tero Marttila <terom@fixme.fi>
Tue, 10 Feb 2009 22:59:52 +0200
changeset 85 0521cf830eb9
parent 83 a34e9f56ddda
child 86 645cf9c4441e
permissions -rw-r--r--
misc. template clean (hide RSS link, remove extra whitespace from calendar source
"""
    Parse log data into log_events
"""

import datetime

import log_line
from log_line import LogTypes

class LogParser (object) :
    """
        Abstract interface
    """

    def __init__ (self, tz, timestamp_fmt="%H:%M:%S") :
        """
            Setup the parser to use the given format for line timestamps, which are of the given timezone
        """

        self.tz = tz
        self.timestamp_fmt = timestamp_fmt

    def parse_lines (self, lines, date=None, starting_offset=None) :
        """
            Parse the given (iterable) lines of unicode text into a LogEvent, no trailing newline.

            Offset is the starting offset, and may be None to not use it.
            
            Giving date lets the parser build full timestamps, otherwise, unless line timestamps have full date
            information, event timestamps will have a date component of 1900/1/1.
        """

        abstract


class IrssiParser (LogParser) :
    """
        A parser for irssi logfiles
    """

    def parse_lines (self, lines, date=None, starting_offset=None) :
        """
            Parse the given lines, yielding LogEvents. 
        """

        for offset, line in enumerate(lines) :
            # offset?
            if starting_offset :
                offset = starting_offset + offset

            else :
                offset = None
            
            # try and parse
            try :
                line = self.parse_line(line, date, offset)

            except Exception, e :
                raise Exception("Parsing line failed: %r@%d: %s" % (line, offset, e))
            
            else :
                # yield unless None
                if line :
                    yield line

    def parse_line (self, line, date, offset=None) :
        """
            Parse a single line, and return the resulting LogLine, or None, to ignore the line
        """
        
        # empty line
        if not line :
            return

        # status lines
        elif line.startswith('---') :
            # XXX: handle these
            return
        
        # normal lines
        else :
            # XXX: only parse timestamps for now
            timestamp, data = line.split(' ', 1)
            
            # parse timestamp into naive datetime
            dt = datetime.datetime.strptime(timestamp, self.timestamp_fmt)
            
            # override date?
            if date :
                dt = dt.replace(year=date.year, month=date.month, day=date.day)
            
            # now localize with timezone
            dtz = self.tz.localize(dt)

            # build raw event
            return log_line.LogLine(offset, LogTypes.RAW, dtz, None, data)