import datetime, time
import re
import logging; log = logging.getLogger('pvl.syslog.parser')
RFC3339_RE = re.compile(r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(\.\d+)?(Z|[+-]\d{2}:\d{2})?')
RFC3339_FMT = '%Y-%m-%dT%H:%M:%S'
def rfc3339 (timestamp) :
"""
RFC3339 timestamps as used in some syslog implementations.
Returns a datetime in some random timezone, possibly localtime.
"""
match = RFC3339_RE.match(timestamp)
if not match :
return None
# parts
dt = datetime.datetime.strptime(match.group(1), RFC3339_FMT)
tz = match.group(2)
# TODO: timezone?
return dt
if not tz :
# XXX: localtime
return dt
elif tz == 'Z' :
# UTC
pass
elif tz[0] in '+-' :
hours, minutes = tz[1:].split(':')
td = datetime.timedelta(hours=int(hours), minutes=int(minutes))
if tz[0] == '-' :
dt += td
if tz[0] == '+' :
dt -= td
else :
raise ValueError("Invalid timezone offset: %s" % timestamp)
# XXX: UTC
return dt
RFC3164_RE = re.compile(r'\w{3} [0-9 ][0-9] \d{2}:\d{2}:\d{2}')
RFC3164_FMT = '%b %d %H:%M:%S'
RFC3164_PRE = '%Y ' # add missing year, assuming current
def rfc3164 (timestamp) :
"""
Traditional BSD Syslog timestamps.
Returns a datetime assumed to be in localtime.
"""
if not RFC3164_RE.match(timestamp) :
return
return datetime.datetime.strptime(time.strftime(RFC3164_PRE) + timestamp, RFC3164_PRE + RFC3164_FMT)
class SyslogParser (object) :
"""
Parse syslog lines in text format, as used in logfiles/fifos.
"""
SEVERITIES = dict(enumerate((
'emerg',
'alert',
'crit',
'err',
'warning',
'notice',
'info',
'debug',
)))
FACILITIES = dict(enumerate((
'kern', # 0
'user', # 1
'mail', # 2
'daemon', # 3
'auth', # 4
'syslog', # 5
'lpr', # 6
'news', # 7
'uucp', # 8
'cron', # 9
'authpriv', # 10
'ftp', # 11
'ntp', # 12
'audit', # 13
'alert', # 14
'clock', # 15
'local0', # 16
'local1', # 17
'local2', # 18
'local3', # 19
'local4', # 20
'local5', # 21
'local6', # 22
'local7', # 23
)))
# default syslogd format
SYSLOG_RE = re.compile(
# the timestamp+hostname header
# XXX: hostname may be missing
# at least in Ubuntu 11.10 syslogd 'last message repeated 2 times'...
r'(?:<(?P<pri>\d+|(?P<facility>\w+)\.(?P<severity>\w+))>)?'
+ r'(?P<timestamp>\w{3} [0-9 ][0-9] \d{2}:\d{2}:\d{2}|.+?) '
+ r'(?P<hostname>\S+)? '
# the message, including possible tag/pid
+ r"(?P<message>(?P<tag>(?P<program>[^:\]]+)(?:\[(?P<pid>\d+)\])?: )?(?P<text>.*))\n?"
)
def __init__ (self, raw=False, facility=None, severity=None) :
"""
Using given facility/severity as default.
"""
self.raw = raw
self.facility = facility
self.severity = severity
def parse_pri (self, match) :
"""
Parse pri/facility/severity.
"""
pri = match.group('pri')
facility = match.group('facility') or self.facility
severity = match.group('severity') or self.severity
if pri and pri.isdigit() :
pri = int(pri)
facility, severity = divmod(pri, 8)
return dict(
pri = pri,
severity = self.SEVERITIES.get(severity, severity),
facility = self.FACILITIES.get(facility, facility)
)
def parse_timestamp (self, match) :
"""
Parse timstamp from line into datetime.
"""
timestamp = match.group('timestamp')
# timestamp, in various formats
try :
return rfc3164(timestamp) or rfc3339(timestamp)
except ValueError as ex:
# skip it
log.warning("timestamp: %s:", timestamp, exc_info=ex)
return None
def parse_prog (self, match) :
"""
Parse prog from line.
"""
prog = match.group('program')
if prog :
return prog
else :
# no tag
return None
def parse (self, line) :
"""
Parse given input line into SyslogMessage.
"""
# ignore whitespace
line = line.strip()
# timestamp?
if self.raw :
# from defaults
return dict(
timestamp = datetime.datetime.now(), # XXX: None?
host = None,
prog = None,
pid = None,
msg = line,
)
else :
# parse
match = self.SYSLOG_RE.match(line)
if not match :
log.warn("Unparseable syslog message: %r", line)
return
# parse
item = dict(
timestamp = self.parse_timestamp(match),
host = match.group('hostname'),
prog = self.parse_prog(match),
pid = match.group('pid'),
msg = match.group('text'),
)
# facility/severity prefix?
item.update(self.parse_pri(match))
return item
def process (self, lines) :
"""
Yield SyslogMessages from given series of lines.
"""
for line in lines :
item = self.parse(line)
log.debug("%s", item)
if item :
yield item
__call__ = process