pvl/syslog/parser.py
changeset 44 977442ccb72d
parent 43 9d13b101beab
child 67 3324ed10c42f
equal deleted inserted replaced
43:9d13b101beab 44:977442ccb72d
     1 """
       
     2     Parse syslog lines in text format.
       
     3 """
       
     4 
       
     5 import datetime, time
     1 import datetime, time
     6 import re
     2 import re
     7 
     3 
     8 import logging; log = logging.getLogger('pvl.syslog.parser')
     4 import logging; log = logging.getLogger('pvl.syslog.parser')
     9 
     5 
    10 class SyslogParser (object) :
     6 class SyslogParser (object) :
    11     """
     7     """
    12         A source of syslog items.
     8         Parse syslog lines in text format, as used in logfiles/fifos.
    13     """
     9     """
    14     
    10     
    15     # default syslogd format
    11     # default syslogd format
    16     SYSLOG_RE = re.compile(
    12     SYSLOG_RE = re.compile(
    17         # the timestamp+hostname header
    13         # the timestamp+hostname header
    21         +   r"(?P<message>(?P<tag>(?P<program>[^:\]]+)(?:\[(?P<pid>\d+)\])?: )?(?P<text>.*))\n?"
    17         +   r"(?P<message>(?P<tag>(?P<program>[^:\]]+)(?:\[(?P<pid>\d+)\])?: )?(?P<text>.*))\n?"
    22     )
    18     )
    23 
    19 
    24     TIMESTAMP_FMT = '%b %d %H:%M:%S'
    20     TIMESTAMP_FMT = '%b %d %H:%M:%S'
    25 
    21 
    26     def __init__ (self, raw=False, prog=None) :
    22     def __init__ (self, raw=False) :
    27         """
    23         """
    28             Using given underlying line source.
    24             Using given underlying line source.
    29         """
    25         """
    30 
    26 
    31         self.raw = raw
    27         self.raw = raw
    32         self.prog = prog
       
    33 
    28 
    34     def parse_timestamp (self, match) :
    29     def parse_timestamp (self, match) :
    35         """
    30         """
    36             Parse timstamp from line into datetime.
    31             Parse timstamp from line into datetime.
    37         """
    32         """
    51             Parse prog from line.
    46             Parse prog from line.
    52         """
    47         """
    53 
    48 
    54         prog = match.group('program')
    49         prog = match.group('program')
    55 
    50 
    56         if not prog :
    51         if prog :
       
    52             return prog
       
    53         else :
    57             # no tag
    54             # no tag
    58             return None
    55             return None
    59         
       
    60         # normalize
       
    61         prog = prog.lower()
       
    62 
       
    63         if prog.startswith('/') :
       
    64             # base
       
    65             prog = prog.split('/')[-1]
       
    66 
       
    67         return prog
       
    68 
    56 
    69     def parse (self, line) :
    57     def parse (self, line) :
    70         """
    58         """
    71             Parse given input line into SyslogMessage.
    59             Parse given input line into SyslogMessage.
    72         """
    60         """
    81         if self.raw :
    69         if self.raw :
    82             # from defaults
    70             # from defaults
    83             return dict(
    71             return dict(
    84                 timestamp   = datetime.datetime.now(), # XXX: None?
    72                 timestamp   = datetime.datetime.now(), # XXX: None?
    85                 host        = None,
    73                 host        = None,
    86                 prog        = self.prog,
    74                 prog        = None,
    87                 pid         = None,
    75                 pid         = None,
    88                 msg         = line,
    76                 msg         = line,
    89             )
    77             )
    90 
    78 
    91         else :
    79         else :
   103                 prog        = self.parse_prog(match),
    91                 prog        = self.parse_prog(match),
   104                 pid         = match.group('pid'),
    92                 pid         = match.group('pid'),
   105                 msg         = match.group('text'),
    93                 msg         = match.group('text'),
   106             )
    94             )
   107     
    95     
   108     def match_prog (self, prog) :
       
   109         """
       
   110             Match given prog?
       
   111         """
       
   112         
       
   113         if not prog :
       
   114             # never matches non-tagged lines
       
   115             return False
       
   116 
       
   117         elif self.prog.endswith('*') :
       
   118             # prefix match
       
   119             return prog.startswith(self.prog[:-1])
       
   120         else :
       
   121             return prog == self.prog
       
   122 
       
   123 
       
   124     def filter (self, line, item) :
       
   125         """
       
   126             Filter given item?
       
   127         """
       
   128 
       
   129         if not item :
       
   130             log.debug("empty: %r", line)
       
   131 
       
   132         elif self.prog and not self.match_prog(item['prog']) :
       
   133             log.debug("prog: %r", item)
       
   134 
       
   135         else :
       
   136             # ok
       
   137             return True
       
   138 
       
   139     def process (self, lines) :
    96     def process (self, lines) :
   140         """
    97         """
   141             Yield SyslogMessages from given series of lines.
    98             Yield SyslogMessages from given series of lines.
   142         """
    99         """
   143 
   100 
   144         for line in lines :
   101         for line in lines :
   145             item = self.parse(line)
   102             item = self.parse(line)
   146             
   103 
   147             # filter?
   104             if item :
   148             if self.filter(line, item) :
       
   149                 yield item
   105                 yield item
   150 
   106 
   151     __call__ = process
   107     __call__ = process
   152 
   108