pvl/syslog/parser.py
changeset 69 9da998198936
parent 67 3324ed10c42f
child 70 c8ec745a2aaa
--- a/pvl/syslog/parser.py	Fri Jan 04 23:47:31 2013 +0200
+++ b/pvl/syslog/parser.py	Fri Jan 04 23:47:53 2013 +0200
@@ -7,13 +7,52 @@
     """
         Parse syslog lines in text format, as used in logfiles/fifos.
     """
-    
+
+    SEVERITIES = dict(enumerate((
+        'emerg',
+        'alert', 
+        'crit', 
+        'err',
+        'warning',
+        'notice',
+        'info', 
+        'debug',
+    )))
+
+    FACILITIES = dict(enumerate((
+        'kern',     # 0
+        'user',     # 1
+        'mail',     # 2
+        'daemon',   # 3
+        'auth',     # 4
+        'syslog',   # 5
+        'lpr',      # 6
+        'news',     # 7
+        'uucp',     # 8
+        'cron',     # 9
+        'authpriv', # 10
+        'ftp',      # 11
+        'ntp',      # 12
+        'audit',    # 13
+        'alert',    # 14
+        'clock',    # 15
+        'local0',   # 16
+        'local1',   # 17
+        'local2',   # 18
+        'local3',   # 19
+        'local4',   # 20
+        'local5',   # 21
+        'local6',   # 22
+        'local7',   # 23
+    )))
+   
     # default syslogd format
     SYSLOG_RE = re.compile(
         # the timestamp+hostname header
         # XXX:  hostname may be missing
         #       at least in Ubuntu 11.10 syslogd 'last message repeated 2 times'...
-            r"(?P<timestamp>\w{3} [0-9 ]\d \d{2}:\d{2}:\d{2}) (?P<hostname>\S+)? "
+            r'(?:<(?P<pri>\d+|(?P<facility>\w+)\.(?P<severity>\w+))>)?'
+        +   r'(?P<timestamp>\w{3} [0-9 ][0-9] \d{2}:\d{2}:\d{2}) (?P<hostname>\S+)? '
 
         # the message, including possible tag/pid
         +   r"(?P<message>(?P<tag>(?P<program>[^:\]]+)(?:\[(?P<pid>\d+)\])?: )?(?P<text>.*))\n?"
@@ -28,6 +67,25 @@
 
         self.raw = raw
 
+    def parse_pri (self, match) :
+        """
+            Parse pri/facility/severity.
+        """
+
+        pri = match.group('pri')
+        facility = match.group('facility')
+        severity = match.group('severity')
+        
+        if pri.isdigit() :
+            pri = int(pri)
+            facility, severity = divmod(pri, 8)
+
+        return dict(
+            pri         = pri,
+            severity    = self.SEVERITIES.get(severity, severity),
+            facility    = self.FACILITIES.get(facility, facility)
+        )
+
     def parse_timestamp (self, match) :
         """
             Parse timstamp from line into datetime.
@@ -87,13 +145,19 @@
                 return
 
             # parse
-            return dict(
+            item = dict(
                 timestamp   = self.parse_timestamp(match),
                 host        = match.group('hostname'),
                 prog        = self.parse_prog(match),
                 pid         = match.group('pid'),
                 msg         = match.group('text'),
             )
+            
+            # facility/severity prefix?
+            if match.group('pri') :
+                item.update(self.parse_pri(match))
+
+            return item
     
     def process (self, lines) :
         """