pvl/syslog/parser.py
author Tero Marttila <terom@fixme.fi>
Fri, 04 Jan 2013 23:47:31 +0200
changeset 68 bea41de5cc98
parent 67 3324ed10c42f
child 69 9da998198936
permissions -rw-r--r--
pvl.syslog.filter: reject empty progs
31
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
     1
import datetime, time
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
     2
import re
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
     3
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
     4
import logging; log = logging.getLogger('pvl.syslog.parser')
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
     5
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
     6
class SyslogParser (object) :
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
     7
    """
44
977442ccb72d pvl.syslog: split out SyslogFilter
Tero Marttila <terom@fixme.fi>
parents: 43
diff changeset
     8
        Parse syslog lines in text format, as used in logfiles/fifos.
31
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
     9
    """
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    10
    
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    11
    # default syslogd format
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    12
    SYSLOG_RE = re.compile(
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    13
        # the timestamp+hostname header
67
3324ed10c42f pvl.syslog.parser: missing hostname in 'last message repeated ... times' messages
Tero Marttila <terom@fixme.fi>
parents: 44
diff changeset
    14
        # XXX:  hostname may be missing
3324ed10c42f pvl.syslog.parser: missing hostname in 'last message repeated ... times' messages
Tero Marttila <terom@fixme.fi>
parents: 44
diff changeset
    15
        #       at least in Ubuntu 11.10 syslogd 'last message repeated 2 times'...
3324ed10c42f pvl.syslog.parser: missing hostname in 'last message repeated ... times' messages
Tero Marttila <terom@fixme.fi>
parents: 44
diff changeset
    16
            r"(?P<timestamp>\w{3} [0-9 ]\d \d{2}:\d{2}:\d{2}) (?P<hostname>\S+)? "
31
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    17
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    18
        # the message, including possible tag/pid
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    19
        +   r"(?P<message>(?P<tag>(?P<program>[^:\]]+)(?:\[(?P<pid>\d+)\])?: )?(?P<text>.*))\n?"
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    20
    )
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    21
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    22
    TIMESTAMP_FMT = '%b %d %H:%M:%S'
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    23
44
977442ccb72d pvl.syslog: split out SyslogFilter
Tero Marttila <terom@fixme.fi>
parents: 43
diff changeset
    24
    def __init__ (self, raw=False) :
31
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    25
        """
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    26
            Using given underlying line source.
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    27
        """
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    28
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    29
        self.raw = raw
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    30
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    31
    def parse_timestamp (self, match) :
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    32
        """
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    33
            Parse timstamp from line into datetime.
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    34
        """
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    35
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    36
        timestamp = match.group('timestamp')
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    37
        
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    38
        # add missing year; assume current
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    39
        timestamp = time.strftime('%Y') + ' ' + timestamp
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    40
        
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    41
        # k
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    42
        timestamp = datetime.datetime.strptime(timestamp, '%Y ' + self.TIMESTAMP_FMT)
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    43
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    44
        return timestamp
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    45
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    46
    def parse_prog (self, match) :
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    47
        """
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    48
            Parse prog from line.
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    49
        """
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    50
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    51
        prog = match.group('program')
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    52
44
977442ccb72d pvl.syslog: split out SyslogFilter
Tero Marttila <terom@fixme.fi>
parents: 43
diff changeset
    53
        if prog :
977442ccb72d pvl.syslog: split out SyslogFilter
Tero Marttila <terom@fixme.fi>
parents: 43
diff changeset
    54
            return prog
977442ccb72d pvl.syslog: split out SyslogFilter
Tero Marttila <terom@fixme.fi>
parents: 43
diff changeset
    55
        else :
31
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    56
            # no tag
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    57
            return None
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    58
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    59
    def parse (self, line) :
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    60
        """
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    61
            Parse given input line into SyslogMessage.
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    62
        """
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    63
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    64
        # ignore whitespace
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    65
        line = line.strip()
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    66
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    67
        # debug
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    68
        log.debug("%s", line)
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    69
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    70
        # timestamp?
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    71
        if self.raw :
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    72
            # from defaults
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    73
            return dict(
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    74
                timestamp   = datetime.datetime.now(), # XXX: None?
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    75
                host        = None,
44
977442ccb72d pvl.syslog: split out SyslogFilter
Tero Marttila <terom@fixme.fi>
parents: 43
diff changeset
    76
                prog        = None,
31
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    77
                pid         = None,
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    78
                msg         = line,
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    79
            )
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    80
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    81
        else :
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    82
            # parse
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    83
            match = self.SYSLOG_RE.match(line)
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    84
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    85
            if not match :
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    86
                log.warn("Unparseable syslog message: %r", line)
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    87
                return
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    88
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    89
            # parse
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    90
            return dict(
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    91
                timestamp   = self.parse_timestamp(match),
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    92
                host        = match.group('hostname'),
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    93
                prog        = self.parse_prog(match),
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    94
                pid         = match.group('pid'),
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    95
                msg         = match.group('text'),
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    96
            )
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    97
    
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    98
    def process (self, lines) :
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    99
        """
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
   100
            Yield SyslogMessages from given series of lines.
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
   101
        """
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
   102
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
   103
        for line in lines :
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
   104
            item = self.parse(line)
44
977442ccb72d pvl.syslog: split out SyslogFilter
Tero Marttila <terom@fixme.fi>
parents: 43
diff changeset
   105
977442ccb72d pvl.syslog: split out SyslogFilter
Tero Marttila <terom@fixme.fi>
parents: 43
diff changeset
   106
            if item :
31
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
   107
                yield item
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
   108
43
9d13b101beab pvl.syslog: implement pvl.syslog.args.apply -> SyslogSource as in pvl.verkko-dhcp
Tero Marttila <terom@fixme.fi>
parents: 31
diff changeset
   109
    __call__ = process
31
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
   110