log_parser.py
author Tero Marttila <terom@fixme.fi>
Mon, 09 Feb 2009 11:05:53 +0200
changeset 64 cdb6403c2498
parent 50 f13cf27a360b
child 65 8b50694f841e
permissions -rw-r--r--
beginnings of a LogSearchIndex class
"""
    Parse log data into log_events
"""

import datetime

import log_line
from log_line import LogTypes

class LogParser (object) :
    """
        Abstract interface
    """

    def __init__ (self, tz, timestamp_fmt="%H:%M:%S") :
        """
            Setup the parser to use the given format for line timestamps, which are of the given timezone
        """

        self.tz = tz
        self.timestamp_fmt = timestamp_fmt

    def parse_lines (self, lines, date=None, starting_offset=None) :
        """
            Parse the given (iterable) lines of unicode text into a LogEvent, no trailing newline.

            Offset is the starting offset, and may be None to not use it.
            
            Giving date lets the parser build full timestamps, otherwise, unless line timestamps have full date
            information, event timestamps will have a date component of 1900/1/1.
        """

        abstract


class IrssiParser (LogParser) :
    """
        A parser for irssi logfiles
    """

    def parse_lines (self, lines, date=None, starting_offset=None) :
        """
            Parse the given lines, yielding LogEvents. 
        """
        
        for offset, line in enumerate(lines) :
            # status lines
            if line.startswith('---') :
                # XXX: handle these
                continue
            
            # normal lines
            else :
                # XXX: only parse timestamps for now
                timestamp, data = line.split(' ', 1)
                
                # parse timestamp into naive datetime
                dt = datetime.datetime.strptime(timestamp, self.timestamp_fmt)
                
                # override date?
                if date :
                    dt = dt.replace(year=date.year, month=date.month, day=date.day)
                
                # now localize with timezone
                dtz = self.tz.localize(dt)

                # offset?
                if offset :
                    offset = starting_offset + offset

                else :
                    offset = None

                # yield raw events
                yield log_line.LogLine(offset, LogTypes.RAW, dtz, None, data)