fixbot/logwatch/message.py
author Tero Marttila <terom@fixme.fi>
Fri, 05 Feb 2010 22:56:14 +0200
changeset 65 1de3c0ca2baf
parent 53 21ab25ffa1e8
permissions -rw-r--r--
improved SyslogMessage.match_program
"""
    Log messages (e.g. syslog)
"""

# stdlib syslog, for facilities and levels
import syslog
import time, datetime
import re
import os.path

PRIORITY_NAMES = {
    syslog.LOG_EMERG:       'emerg',
    syslog.LOG_ALERT:       'alert', 
    syslog.LOG_CRIT:        'crit', 
    syslog.LOG_ERR:         'err',
    syslog.LOG_WARNING:     'warning',
    syslog.LOG_NOTICE:      'notice',
    syslog.LOG_INFO:        'info', 
    syslog.LOG_DEBUG:       'debug',
}

FACILITY_NAMES = {
    syslog.LOG_KERN:	'kern',     # 0
    syslog.LOG_USER:	'user',     # 1
    syslog.LOG_MAIL:	'mail',     # 2
    syslog.LOG_DAEMON:	'daemon',   # 3
    syslog.LOG_AUTH:	'auth',     # 4
    5:                  'syslog',   # 5
    syslog.LOG_LPR:	    'lpr',      # 6
    syslog.LOG_NEWS:	'news',     # 7
    syslog.LOG_UUCP:	'uucp',     # 8
    syslog.LOG_CRON:	'cron',     # 9
    10:                 'authpriv', # 10
    11:                 'ftp',      # 11
    12:                 'ntp',      # 12
    13:                 'audit',    # 13
    14:                 'alert',    # 14
    15:                 'clock',    # 15
    syslog.LOG_LOCAL0:	'local0',   # 16
    syslog.LOG_LOCAL1:	'local1',   # 17
    syslog.LOG_LOCAL2:	'local2',   # 18
    syslog.LOG_LOCAL3:	'local3',   # 19
    syslog.LOG_LOCAL4:	'local4',   # 20
    syslog.LOG_LOCAL5:	'local5',   # 21
    syslog.LOG_LOCAL6:	'local6',   # 22
    syslog.LOG_LOCAL7:	'local7',   # 23
}

class SyslogMessage (object) :
    """
        A message from syslog with the following fields:

            pri             - (optional) raw integer priority field
            priority        - (optional) message priority as text
            facility        - (optional) message facility as text
            timestamp       - message timestamp as a datetime
            hostname        - originating hostname
            tag             - (optional) message tag including process name and id
            program         - (optional) process name part of message tag
            pid             - (optional) process ID part of message tag
            text            - the log message following the tag
            message         - the full log message including the tag
            raw             - the full syslog-format message
    """

    # the regular expression used to parse the lines
    LINE_RE = re.compile(
            # the priority field as in raw syslog messages
            r"(?:<(?P<pri>\d+)>)?"

            # the timestamp+hostname header
        +   r"(?P<timestamp>\w{3} [0-9 ]\d \d{2}:\d{2}:\d{2}) (?P<hostname>\S+)"

            # the message, including possible tag/pid
        +   r" (?P<message>(?P<tag>(?P<program>[^:\]]+)(?:\[(?P<pid>\d+)\])?: )?(?P<text>.*))\n?"
    )

    # strptime format of timestamp
    TIMESTAMP_FMT = "%b %d %H:%M:%S"

    def _parse_pri (self, match) :
        """
            Parse the priority/facility from the given match object
        """
        
        # raw integer
        self.pri = int(match.group('pri')) if match.group('pri') else None
        
        if self.pri :
            # unpack
            priority = self.pri % 8
            facility = self.pri // 8
            
            # translate to names
            self.priority = PRIORITY_NAMES.get(priority, str(priority))
            self.facility = FACILITY_NAMES.get(facility, str(facility))
        
        else :
            self.priority = self.facility = None
    
    def _parse_timestamp (self, match) :
        """
            Parse the timestamp field into a datetime.datetime from the given match object
        """
    
        timestamp = match.group('timestamp')

        try :
            ts = time.strptime(timestamp, self.TIMESTAMP_FMT)

        except Exception, ex :
            raise ValueError("Invalid timestamp: %s: %s" % (timestamp, ex))
        
        # build timestamp
        self.timestamp = datetime.datetime(
            # fix year - strptime default is 1900
            ts.tm_year if ts.tm_year != 1900 else time.localtime().tm_year,

            # month, day, hour, minute, second
            *ts[1:6]
        )
    
    def _parse_hostname (self, match) :
        """
            Parse the hostname from the given match object
        """
        
        # nothing much needed..
        self.hostname = match.group('hostname')
    
    def _parse_message (self, match) :
        """
            Parse the message with tag from the given match object
        """

        self.message = match.group('message')
        self.tag = match.group('tag')
        self.program = match.group('program')
        self.pid = int(match.group('pid')) if match.group('pid') else None
        self.text = match.group('text')
        
    def __init__ (self, line) :
        """
            Construct this message from the given line
        """
        
        # apply regexp
        match = self.LINE_RE.match(line)

        if not match :
            # fail
            raise ValueError("Invalid syslog data format")
        
        # unpack the various portions
        self._parse_pri(match)
        self._parse_timestamp(match)
        self._parse_hostname(match)
        self._parse_message(match)
        
        # the raw line as matched
        self.raw = match.group(0)
    
    def properties (self) :
        """
            Return a dict containing the attributes specified for this message
        """
        
        # XXX: ugh... doesn't do @property
        return dict(
            facility        = self.facility,
            priority        = self.priority,
            timestamp       = self.timestamp,
            hostname        = self.hostname,
            tag             = self.tag,
            program         = self.program,
            pid             = self.pid,
            text            = self.text,
            message         = self.message,
        )

    @property
    def normalized_program (self) :
        """
            Returns a normalized version of the program name:
                - lowercase
                - no path components

            Returns None if there is no program tag
        """
        
        if self.program is None :
            return None

        else :
            return os.path.basename(self.program).lower()
    
    def match_program (self, program) :
        """
            If the given value is False, checks that this message does *not* have a program tag.

            Otherwise, does a case-insensitive basename() compare of the given value against the message tag's program
            name portion.
        """

        if program is False :
            # only match if no tag
            return self.program is None
        
        elif self.program is None :
            # no program to match against...
            return False

        else :
            # match against normalized name
            return self.normalized_program == program.lower()

    def match (self, regexp=None, program=None) :
        """
            Evaluate a match against this message using the given criteria
                
                regexp          - match regexp against the message's contents
                program         - match program component of tag against given value, may also be False
        """

        params = {}
        
        if program is not None :
            if not self.match_program(program) :
                # program did not match
                return False

        ## pattern
        if regexp is not None :
            # apply regexp
            match = regexp.match(self.text)

            if not match :
                # no match
                return False
            
            # good, params
            params.update(match.groupdict())
        
        # we appear to have a match
        return params
        
    def __str__ (self) :
        """
            Format to default format
        """

        return "%s %s %s" % (self.timestamp.strftime(self.TIMESTAMP_FMT), self.hostname, self.message)
    
    def __repr__ (self) :
        return "pri=%s:%s, timestamp=%s, hostname=%s, tag=%s[%s], text=%r" % (
            self.facility, self.priority, self.timestamp.isoformat(), self.hostname, self.program, self.pid, self.text
        )