pvl/syslog/parser.py
author Tero Marttila <terom@fixme.fi>
Thu, 03 Jan 2013 00:56:28 +0200
changeset 44 977442ccb72d
parent 43 9d13b101beab
child 67 3324ed10c42f
permissions -rw-r--r--
pvl.syslog: split out SyslogFilter
31
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
     1
import datetime, time
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
     2
import re
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
     3
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
     4
import logging; log = logging.getLogger('pvl.syslog.parser')
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
     5
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
     6
class SyslogParser (object) :
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
     7
    """
44
977442ccb72d pvl.syslog: split out SyslogFilter
Tero Marttila <terom@fixme.fi>
parents: 43
diff changeset
     8
        Parse syslog lines in text format, as used in logfiles/fifos.
31
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
     9
    """
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    10
    
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    11
    # default syslogd format
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    12
    SYSLOG_RE = re.compile(
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    13
        # the timestamp+hostname header
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    14
            r"(?P<timestamp>\w{3} [0-9 ]\d \d{2}:\d{2}:\d{2}) (?P<hostname>\S+) "
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    15
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    16
        # the message, including possible tag/pid
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    17
        +   r"(?P<message>(?P<tag>(?P<program>[^:\]]+)(?:\[(?P<pid>\d+)\])?: )?(?P<text>.*))\n?"
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    18
    )
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    19
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    20
    TIMESTAMP_FMT = '%b %d %H:%M:%S'
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    21
44
977442ccb72d pvl.syslog: split out SyslogFilter
Tero Marttila <terom@fixme.fi>
parents: 43
diff changeset
    22
    def __init__ (self, raw=False) :
31
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    23
        """
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    24
            Using given underlying line source.
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    25
        """
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    26
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    27
        self.raw = raw
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    28
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    29
    def parse_timestamp (self, match) :
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    30
        """
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    31
            Parse timstamp from line into datetime.
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    32
        """
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    33
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    34
        timestamp = match.group('timestamp')
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    35
        
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    36
        # add missing year; assume current
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    37
        timestamp = time.strftime('%Y') + ' ' + timestamp
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    38
        
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    39
        # k
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    40
        timestamp = datetime.datetime.strptime(timestamp, '%Y ' + self.TIMESTAMP_FMT)
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    41
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    42
        return timestamp
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    43
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    44
    def parse_prog (self, match) :
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    45
        """
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    46
            Parse prog from line.
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    47
        """
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    48
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    49
        prog = match.group('program')
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    50
44
977442ccb72d pvl.syslog: split out SyslogFilter
Tero Marttila <terom@fixme.fi>
parents: 43
diff changeset
    51
        if prog :
977442ccb72d pvl.syslog: split out SyslogFilter
Tero Marttila <terom@fixme.fi>
parents: 43
diff changeset
    52
            return prog
977442ccb72d pvl.syslog: split out SyslogFilter
Tero Marttila <terom@fixme.fi>
parents: 43
diff changeset
    53
        else :
31
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    54
            # no tag
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    55
            return None
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    56
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    57
    def parse (self, line) :
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    58
        """
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    59
            Parse given input line into SyslogMessage.
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    60
        """
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    61
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    62
        # ignore whitespace
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    63
        line = line.strip()
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    64
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    65
        # debug
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    66
        log.debug("%s", line)
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    67
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    68
        # timestamp?
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    69
        if self.raw :
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    70
            # from defaults
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    71
            return dict(
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    72
                timestamp   = datetime.datetime.now(), # XXX: None?
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    73
                host        = None,
44
977442ccb72d pvl.syslog: split out SyslogFilter
Tero Marttila <terom@fixme.fi>
parents: 43
diff changeset
    74
                prog        = None,
31
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    75
                pid         = None,
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    76
                msg         = line,
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    77
            )
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    78
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    79
        else :
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    80
            # parse
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    81
            match = self.SYSLOG_RE.match(line)
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    82
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    83
            if not match :
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    84
                log.warn("Unparseable syslog message: %r", line)
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    85
                return
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    86
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    87
            # parse
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    88
            return dict(
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    89
                timestamp   = self.parse_timestamp(match),
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    90
                host        = match.group('hostname'),
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    91
                prog        = self.parse_prog(match),
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    92
                pid         = match.group('pid'),
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    93
                msg         = match.group('text'),
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    94
            )
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    95
    
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    96
    def process (self, lines) :
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    97
        """
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    98
            Yield SyslogMessages from given series of lines.
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
    99
        """
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
   100
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
   101
        for line in lines :
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
   102
            item = self.parse(line)
44
977442ccb72d pvl.syslog: split out SyslogFilter
Tero Marttila <terom@fixme.fi>
parents: 43
diff changeset
   103
977442ccb72d pvl.syslog: split out SyslogFilter
Tero Marttila <terom@fixme.fi>
parents: 43
diff changeset
   104
            if item :
31
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
   105
                yield item
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
   106
43
9d13b101beab pvl.syslog: implement pvl.syslog.args.apply -> SyslogSource as in pvl.verkko-dhcp
Tero Marttila <terom@fixme.fi>
parents: 31
diff changeset
   107
    __call__ = process
31
3e6d0feb115c pvl.syslog: import from pvl-collectd
Tero Marttila <terom@paivola.fi>
parents:
diff changeset
   108