--- a/pvl/syslog/parser.py Fri Jan 04 23:47:53 2013 +0200
+++ b/pvl/syslog/parser.py Sat Jan 05 01:30:27 2013 +0200
@@ -3,6 +3,66 @@
import logging; log = logging.getLogger('pvl.syslog.parser')
+RFC3339_RE = re.compile(r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(\.\d+)?(Z|[+-]\d{2}:\d{2})?')
+RFC3339_FMT = '%Y-%m-%dT%H:%M:%S'
+
+def rfc3339 (timestamp) :
+ """
+ RFC3339 timestamps as used in some syslog implementations.
+
+ Returns a datetime in some random timezone, possibly localtime.
+ """
+
+ match = RFC3339_RE.match(timestamp)
+
+ if not match :
+ return None
+
+ # parts
+ dt = datetime.datetime.strptime(match.group(1), RFC3339_FMT)
+ tz = match.group(2)
+
+ # TODO: timezone?
+ return dt
+
+ if not tz :
+ # XXX: localtime
+ return dt
+
+ elif tz == 'Z' :
+ # UTC
+ pass
+
+ elif tz[0] in '+-' :
+ hours, minutes = tz[1:].split(':')
+ td = datetime.timedelta(hours=int(hours), minutes=int(minutes))
+
+ if tz[0] == '-' :
+ dt += td
+ if tz[0] == '+' :
+ dt -= td
+ else :
+ raise ValueError("Invalid timezone offset: %s" % timestamp)
+
+ # XXX: UTC
+ return dt
+
+RFC3164_RE = re.compile(r'\w{3} [0-9 ][0-9] \d{2}:\d{2}:\d{2}')
+RFC3164_FMT = '%b %d %H:%M:%S'
+RFC3164_PRE = '%Y ' # add missing year, assuming current
+
+def rfc3164 (timestamp) :
+ """
+ Traditional BSD Syslog timestamps.
+
+ Returns a datetime assumed to be in localtime.
+ """
+
+ if not RFC3164_RE.match(timestamp) :
+ return
+
+ return datetime.datetime.strptime(time.strftime(RFC3164_PRE) + timestamp, RFC3164_PRE + RFC3164_FMT)
+
class SyslogParser (object) :
"""
Parse syslog lines in text format, as used in logfiles/fifos.
@@ -45,21 +105,20 @@
'local6', # 22
'local7', # 23
)))
-
+
# default syslogd format
SYSLOG_RE = re.compile(
# the timestamp+hostname header
# XXX: hostname may be missing
# at least in Ubuntu 11.10 syslogd 'last message repeated 2 times'...
r'(?:<(?P<pri>\d+|(?P<facility>\w+)\.(?P<severity>\w+))>)?'
- + r'(?P<timestamp>\w{3} [0-9 ][0-9] \d{2}:\d{2}:\d{2}) (?P<hostname>\S+)? '
+ + r'(?P<timestamp>\w{3} [0-9 ][0-9] \d{2}:\d{2}:\d{2}|.+?) '
+ + r'(?P<hostname>\S+)? '
# the message, including possible tag/pid
+ r"(?P<message>(?P<tag>(?P<program>[^:\]]+)(?:\[(?P<pid>\d+)\])?: )?(?P<text>.*))\n?"
)
- TIMESTAMP_FMT = '%b %d %H:%M:%S'
-
def __init__ (self, raw=False) :
"""
Using given underlying line source.
@@ -92,14 +151,15 @@
"""
timestamp = match.group('timestamp')
-
- # add missing year; assume current
- timestamp = time.strftime('%Y') + ' ' + timestamp
-
- # k
- timestamp = datetime.datetime.strptime(timestamp, '%Y ' + self.TIMESTAMP_FMT)
- return timestamp
+ # timestamp, in various formats
+ try :
+ return rfc3164(timestamp) or rfc3339(timestamp)
+
+ except ValueError as ex:
+ # skip it
+ log.warning("timestamp: %s:", timestamp, exc_info=ex)
+ return None
def parse_prog (self, match) :
"""
@@ -152,7 +212,7 @@
pid = match.group('pid'),
msg = match.group('text'),
)
-
+
# facility/severity prefix?
if match.group('pri') :
item.update(self.parse_pri(match))