terom@50: """ terom@50: Log messages (e.g. syslog) terom@50: """ terom@50: terom@50: # stdlib syslog, for facilities and levels terom@50: import syslog terom@50: import time, datetime terom@50: import re terom@65: import os.path terom@50: terom@50: PRIORITY_NAMES = { terom@50: syslog.LOG_EMERG: 'emerg', terom@50: syslog.LOG_ALERT: 'alert', terom@50: syslog.LOG_CRIT: 'crit', terom@50: syslog.LOG_ERR: 'err', terom@50: syslog.LOG_WARNING: 'warning', terom@50: syslog.LOG_NOTICE: 'notice', terom@50: syslog.LOG_INFO: 'info', terom@50: syslog.LOG_DEBUG: 'debug', terom@50: } terom@50: terom@50: FACILITY_NAMES = { terom@50: syslog.LOG_KERN: 'kern', # 0 terom@50: syslog.LOG_USER: 'user', # 1 terom@50: syslog.LOG_MAIL: 'mail', # 2 terom@50: syslog.LOG_DAEMON: 'daemon', # 3 terom@50: syslog.LOG_AUTH: 'auth', # 4 terom@50: 5: 'syslog', # 5 terom@50: syslog.LOG_LPR: 'lpr', # 6 terom@50: syslog.LOG_NEWS: 'news', # 7 terom@50: syslog.LOG_UUCP: 'uucp', # 8 terom@50: syslog.LOG_CRON: 'cron', # 9 terom@50: 10: 'authpriv', # 10 terom@50: 11: 'ftp', # 11 terom@50: 12: 'ntp', # 12 terom@50: 13: 'audit', # 13 terom@50: 14: 'alert', # 14 terom@50: 15: 'clock', # 15 terom@50: syslog.LOG_LOCAL0: 'local0', # 16 terom@50: syslog.LOG_LOCAL1: 'local1', # 17 terom@50: syslog.LOG_LOCAL2: 'local2', # 18 terom@50: syslog.LOG_LOCAL3: 'local3', # 19 terom@50: syslog.LOG_LOCAL4: 'local4', # 20 terom@50: syslog.LOG_LOCAL5: 'local5', # 21 terom@50: syslog.LOG_LOCAL6: 'local6', # 22 terom@50: syslog.LOG_LOCAL7: 'local7', # 23 terom@50: } terom@50: terom@50: class SyslogMessage (object) : terom@50: """ terom@50: A message from syslog with the following fields: terom@50: terom@50: pri - (optional) raw integer priority field terom@50: priority - (optional) message priority as text terom@50: facility - (optional) message facility as text terom@50: timestamp - message timestamp as a datetime terom@50: hostname - originating hostname terom@50: tag - (optional) message tag including process name and id terom@50: program - (optional) process name part of message tag terom@50: pid - (optional) process ID part of message tag terom@50: text - the log message following the tag terom@50: message - the full log message including the tag terom@50: raw - the full syslog-format message terom@50: """ terom@50: terom@50: # the regular expression used to parse the lines terom@50: LINE_RE = re.compile( terom@50: # the priority field as in raw syslog messages terom@50: r"(?:<(?P\d+)>)?" terom@50: terom@50: # the timestamp+hostname header terom@50: + r"(?P\w{3} [0-9 ]\d \d{2}:\d{2}:\d{2}) (?P\S+)" terom@50: terom@50: # the message, including possible tag/pid terom@53: + r" (?P(?P(?P[^:\]]+)(?:\[(?P\d+)\])?: )?(?P.*))\n?" terom@50: ) terom@50: terom@50: # strptime format of timestamp terom@50: TIMESTAMP_FMT = "%b %d %H:%M:%S" terom@50: terom@50: def _parse_pri (self, match) : terom@50: """ terom@50: Parse the priority/facility from the given match object terom@50: """ terom@50: terom@50: # raw integer terom@50: self.pri = int(match.group('pri')) if match.group('pri') else None terom@50: terom@50: if self.pri : terom@50: # unpack terom@50: priority = self.pri % 8 terom@50: facility = self.pri // 8 terom@50: terom@50: # translate to names terom@50: self.priority = PRIORITY_NAMES.get(priority, str(priority)) terom@50: self.facility = FACILITY_NAMES.get(facility, str(facility)) terom@50: terom@50: else : terom@50: self.priority = self.facility = None terom@50: terom@50: def _parse_timestamp (self, match) : terom@50: """ terom@50: Parse the timestamp field into a datetime.datetime from the given match object terom@50: """ terom@50: terom@50: timestamp = match.group('timestamp') terom@50: terom@50: try : terom@50: ts = time.strptime(timestamp, self.TIMESTAMP_FMT) terom@50: terom@50: except Exception, ex : terom@50: raise ValueError("Invalid timestamp: %s: %s" % (timestamp, ex)) terom@50: terom@50: # build timestamp terom@50: self.timestamp = datetime.datetime( terom@50: # fix year - strptime default is 1900 terom@50: ts.tm_year if ts.tm_year != 1900 else time.localtime().tm_year, terom@50: terom@50: # month, day, hour, minute, second terom@50: *ts[1:6] terom@50: ) terom@50: terom@50: def _parse_hostname (self, match) : terom@50: """ terom@50: Parse the hostname from the given match object terom@50: """ terom@50: terom@50: # nothing much needed.. terom@50: self.hostname = match.group('hostname') terom@50: terom@50: def _parse_message (self, match) : terom@50: """ terom@50: Parse the message with tag from the given match object terom@50: """ terom@50: terom@50: self.message = match.group('message') terom@50: self.tag = match.group('tag') terom@50: self.program = match.group('program') terom@50: self.pid = int(match.group('pid')) if match.group('pid') else None terom@50: self.text = match.group('text') terom@50: terom@50: def __init__ (self, line) : terom@50: """ terom@50: Construct this message from the given line terom@50: """ terom@50: terom@50: # apply regexp terom@50: match = self.LINE_RE.match(line) terom@50: terom@50: if not match : terom@50: # fail terom@50: raise ValueError("Invalid syslog data format") terom@50: terom@50: # unpack the various portions terom@50: self._parse_pri(match) terom@50: self._parse_timestamp(match) terom@50: self._parse_hostname(match) terom@50: self._parse_message(match) terom@50: terom@50: # the raw line as matched terom@50: self.raw = match.group(0) terom@53: terom@53: def properties (self) : terom@53: """ terom@53: Return a dict containing the attributes specified for this message terom@53: """ terom@53: terom@53: # XXX: ugh... doesn't do @property terom@53: return dict( terom@53: facility = self.facility, terom@53: priority = self.priority, terom@53: timestamp = self.timestamp, terom@53: hostname = self.hostname, terom@53: tag = self.tag, terom@53: program = self.program, terom@53: pid = self.pid, terom@53: text = self.text, terom@53: message = self.message, terom@53: ) terom@50: terom@65: @property terom@65: def normalized_program (self) : terom@65: """ terom@65: Returns a normalized version of the program name: terom@65: - lowercase terom@65: - no path components terom@65: terom@65: Returns None if there is no program tag terom@65: """ terom@65: terom@65: if self.program is None : terom@65: return None terom@65: terom@65: else : terom@65: return os.path.basename(self.program).lower() terom@65: terom@65: def match_program (self, program) : terom@65: """ terom@65: If the given value is False, checks that this message does *not* have a program tag. terom@65: terom@65: Otherwise, does a case-insensitive basename() compare of the given value against the message tag's program terom@65: name portion. terom@65: """ terom@65: terom@65: if program is False : terom@65: # only match if no tag terom@65: return self.program is None terom@65: terom@65: elif self.program is None : terom@65: # no program to match against... terom@65: return False terom@65: terom@65: else : terom@65: # match against normalized name terom@65: return self.normalized_program == program.lower() terom@65: terom@53: def match (self, regexp=None, program=None) : terom@53: """ terom@53: Evaluate a match against this message using the given criteria terom@53: terom@53: regexp - match regexp against the message's contents terom@53: program - match program component of tag against given value, may also be False terom@53: """ terom@53: terom@53: params = {} terom@53: terom@65: if program is not None : terom@65: if not self.match_program(program) : terom@65: # program did not match terom@53: return False terom@53: terom@53: ## pattern terom@53: if regexp is not None : terom@53: # apply regexp terom@53: match = regexp.match(self.text) terom@53: terom@53: if not match : terom@53: # no match terom@53: return False terom@53: terom@53: # good, params terom@53: params.update(match.groupdict()) terom@53: terom@53: # we appear to have a match terom@53: return params terom@53: terom@50: def __str__ (self) : terom@50: """ terom@50: Format to default format terom@50: """ terom@50: terom@50: return "%s %s %s" % (self.timestamp.strftime(self.TIMESTAMP_FMT), self.hostname, self.message) terom@50: terom@50: def __repr__ (self) : terom@50: return "pri=%s:%s, timestamp=%s, hostname=%s, tag=%s[%s], text=%r" % ( terom@50: self.facility, self.priority, self.timestamp.isoformat(), self.hostname, self.program, self.pid, self.text terom@50: ) terom@50: