pvl/syslog/parser.py
author Tero Marttila <terom@fixme.fi>
Thu, 03 Jan 2013 00:56:28 +0200
changeset 44 977442ccb72d
parent 43 9d13b101beab
child 67 3324ed10c42f
permissions -rw-r--r--
pvl.syslog: split out SyslogFilter
import datetime, time
import re

import logging; log = logging.getLogger('pvl.syslog.parser')

class SyslogParser (object) :
    """
        Parse syslog lines in text format, as used in logfiles/fifos.
    """
    
    # default syslogd format
    SYSLOG_RE = re.compile(
        # the timestamp+hostname header
            r"(?P<timestamp>\w{3} [0-9 ]\d \d{2}:\d{2}:\d{2}) (?P<hostname>\S+) "

        # the message, including possible tag/pid
        +   r"(?P<message>(?P<tag>(?P<program>[^:\]]+)(?:\[(?P<pid>\d+)\])?: )?(?P<text>.*))\n?"
    )

    TIMESTAMP_FMT = '%b %d %H:%M:%S'

    def __init__ (self, raw=False) :
        """
            Using given underlying line source.
        """

        self.raw = raw

    def parse_timestamp (self, match) :
        """
            Parse timstamp from line into datetime.
        """

        timestamp = match.group('timestamp')
        
        # add missing year; assume current
        timestamp = time.strftime('%Y') + ' ' + timestamp
        
        # k
        timestamp = datetime.datetime.strptime(timestamp, '%Y ' + self.TIMESTAMP_FMT)

        return timestamp

    def parse_prog (self, match) :
        """
            Parse prog from line.
        """

        prog = match.group('program')

        if prog :
            return prog
        else :
            # no tag
            return None

    def parse (self, line) :
        """
            Parse given input line into SyslogMessage.
        """

        # ignore whitespace
        line = line.strip()

        # debug
        log.debug("%s", line)

        # timestamp?
        if self.raw :
            # from defaults
            return dict(
                timestamp   = datetime.datetime.now(), # XXX: None?
                host        = None,
                prog        = None,
                pid         = None,
                msg         = line,
            )

        else :
            # parse
            match = self.SYSLOG_RE.match(line)

            if not match :
                log.warn("Unparseable syslog message: %r", line)
                return

            # parse
            return dict(
                timestamp   = self.parse_timestamp(match),
                host        = match.group('hostname'),
                prog        = self.parse_prog(match),
                pid         = match.group('pid'),
                msg         = match.group('text'),
            )
    
    def process (self, lines) :
        """
            Yield SyslogMessages from given series of lines.
        """

        for line in lines :
            item = self.parse(line)

            if item :
                yield item

    __call__ = process