# HG changeset patch # User Tero Marttila # Date 1234314311 -7200 # Node ID 6165f1ba458dc1a767ad41cddab6eaf9c574f09c # Parent d30c88e89a7edbf4b52ca2fe0a2972b3141220dd implement parser/formatter netsplits and day-change diff -r d30c88e89a7e -r 6165f1ba458d log_formatter.py --- a/log_formatter.py Wed Feb 11 03:04:35 2009 +0200 +++ b/log_formatter.py Wed Feb 11 03:05:11 2009 +0200 @@ -29,17 +29,23 @@ Use the given TTF font to render image text with the given size, if given, otherwise, a default one. """ - + + # store self.tz = tz self.timestamp_fmt = timestamp_fmt self.img_ttf_path = img_ttf_path self.img_font_size = img_font_size + + # XXX: harcoded + self.date_fmt = '%Y-%m-%d' - def _format_line_text (self, line, template_dict, type=None, full_timestamp=False) : + def _format_line_text (self, line, template_dict, type=None, full_timestamp=False, **extra) : """ Format the given line as text, using the given { type: string template } dict. If type is given, then it overrides line.type + + Any additional keyword args will also be available for the template to use """ # default type? @@ -47,15 +53,19 @@ type = line.type # look up the template - template = template_dict[type] + if type in template_dict : + template = template_dict[type] + + else : + raise Exception("Format template not defined for type: %s" % LogTypes.name_from_code(type)) # convert timestamp into display timezone dtz = line.timestamp.astimezone(self.tz) # full timestamps? if full_timestamp : - # XXX: ugly hack - timestamp_fmt = '%Y-%m-%d ' + self.timestamp_fmt + # XXX: let the user define a 'datetime' format instead? + timestamp_fmt = self.date_fmt + self.timestamp_fmt else : timestamp_fmt = self.timestamp_fmt @@ -68,6 +78,7 @@ return template % dict( channel_name = line.channel.name, datetime = dtz.strftime('%a %b %d %H:%M:%S %Y'), + date = dtz.strftime(self.date_fmt), timestamp = dtz.strftime(timestamp_fmt), source_nickname = source_nickname, source_username = source_username, @@ -75,6 +86,7 @@ source_chanflag = source_chanflag, target_nickname = target_nickname, message = line.data, + **extra ) def format_txt (self, lines, full_timestamps=False) : @@ -165,6 +177,7 @@ LogTypes.RAW : "%(timestamp)s %(data)s", LogTypes.LOG_OPEN : "--- Log opened %(datetime)s", LogTypes.LOG_CLOSE : "--- Log closed %(datetime)s", + 'DAY_CHANGED' : "--- Day changed %(date)s", LogTypes.MSG : "%(timestamp)s <%(source_chanflag)s%(source_nickname)s> %(message)s", LogTypes.NOTICE : "%(timestamp)s -%(source_nickname)s- %(message)s", @@ -183,20 +196,33 @@ LogTypes.SELF_NOTICE: "%(timestamp)s -%(source_nickname)s- %(message)s", LogTypes.SELF_NICK : "%(timestamp)s -!- %(source_nickname)s is now known as %(target_nickname)s", + + LogTypes.NETSPLIT_START : + "%(timestamp)s -!- Netsplit %(source_hostname)s <-> %(target_nickname)s quits: %(_netsplit_targets)s", + LogTypes.NETSPLIT_END : + "%(timestamp)s -!- Netsplit over, joins: %(_netsplit_targets)s", } def format_txt (self, lines, full_timestamps=False) : # ...handle each line for line in lines : - # specialcases + # extra args + extra = {} + + # specialcase type? if line.type == LogTypes.TOPIC and line.data is None : type = 'TOPIC_UNSET' else : type = line.type + # format netsplit stuff + if line.type & LogTypes._NETSPLIT_MASK : + # format the netsplit-targets stuff + extra['_netsplit_targets'] = line.data + # using __TYPES - yield line, self._format_line_text(line, self.__FMT, type, full_timestamps) + yield line, self._format_line_text(line, self.__FMT, type, full_timestamps, **extra) class IrssiFormatter (BaseHTMLFormatter, IrssiTextFormatter) : """ @@ -220,7 +246,7 @@ # iterate for line in lines : # just dump - yield line, str(line) + yield line, unicode(line) def by_name (name) : """ diff -r d30c88e89a7e -r 6165f1ba458d log_line.py --- a/log_line.py Wed Feb 11 03:04:35 2009 +0200 +++ b/log_line.py Wed Feb 11 03:05:11 2009 +0200 @@ -81,6 +81,14 @@ # we () changed nickname to ('SELF_NICK', 0x52), + + ## slightly weirder bits + # netsplit between and , is a space-separated list of s affected + # the last item in the list of nicknames may also be of the form "+", where count is the number of additional, but hidden, nicknames affected + ('NETSPLIT_START', 0x61), + + # netsplit over, is a list of users affected, see NETSPLIT_START + ('NETSPLIT_END', 0x062), ] @classmethod @@ -95,6 +103,9 @@ for name, code in LogTypes.LIST : setattr(LogTypes, name, code) +# masks +LogTypes._NETSPLIT_MASK = 0x60 + class LogLine (object) : """ An event on some specific channel @@ -157,7 +168,7 @@ '@' + host if host else '' ) - def __str__ (self) : + def __unicode__ (self) : return '\t'.join(( self.channel.name, str(self.offset), @@ -165,7 +176,7 @@ str(self.timestamp), self.format_source(), str(self.target), - str(self.data) + unicode(self.data) )) def __repr__ (self) : diff -r d30c88e89a7e -r 6165f1ba458d log_parser.py --- a/log_parser.py Wed Feb 11 03:04:35 2009 +0200 +++ b/log_parser.py Wed Feb 11 03:05:11 2009 +0200 @@ -12,7 +12,7 @@ Parsing some line failed """ - def __init__ (self, offset, line, message) : + def __init__ (self, line, offset, message) : super(LogParseError, self).__init__("%r@%s: %s" % (line, offset, message)) class LogParser (object) : @@ -56,6 +56,8 @@ _CHAN2 = r'(?P.+?)' _USERHOST = r'(?P.*?)@(?P.*?)' _MSG = r'(?P.*)' + _SRV1 = r'(?P.+?)' + _SRV2 = r'(?P.+?)' # regular expressions for matching lines, by type TYPE_EXPRS = ( @@ -73,7 +75,12 @@ ( LogTypes.TOPIC, _TS + r' -!- (' + _NICK + ' changed the topic of ' + _CHAN + ' to: (?P.*)|Topic unset by ' + _NICK2 + ' on ' + _CHAN2 + ')' ), ( LogTypes.SELF_NOTICE, _TS + r' \[notice\(' + _CHAN + '\)\] ' + _MSG ), - ( LogTypes.SELF_NICK, _TS + r' -!- You\'re now known as (?P\S+)' ), + ( LogTypes.SELF_NICK, _TS + r' -!- You\'re now known as (?P\S+)' ), + + ( LogTypes.NETSPLIT_START, _TS + r' -!- Netsplit ' + _SRV1 + ' <-> ' + _SRV2 + ' quits: (?P[^(]+)( \(\+(?P\d+) more,\S+\))?'), + ( LogTypes.NETSPLIT_END, _TS + r' -!- Netsplit over, joins: (?P[^(]+)( \(\+(?P\d+) more\))?' ), + + ( 'DAY_CHANGED', r'--- Day changed (?P.+)' ), ) # precompile @@ -104,7 +111,7 @@ # no match found? if not match : - raise LogParseError(offset, line, "Line did not match any type") + raise LogParseError(line, offset, "Line did not match any type") # match groups groups = match.groupdict(None) @@ -122,9 +129,13 @@ if date : dt = dt.replace(year=date.year, month=date.month, day=date.day) + elif 'date' in groups : + # parse date-only datetime + dt = datetime.datetime.strptime(groups['date'], '%a %b %d %Y') + else : # no timestamp !? - raise LogParseError(offset, line, "No timestamp") + raise LogParseError(line, offset, "No timestamp") # now localize with timezone dtz = self.tz.localize(dt) @@ -133,10 +144,18 @@ channel_name = (groups.get('channel') or groups.get('channel2')) # source - source = (groups.get('nickname') or groups.get('nickname2'), groups.get('username'), groups.get('hostname'), groups.get('flags')) + if 'server1' in groups : + source = (None, None, groups.get('server1'), None) + + else : + source = (groups.get('nickname') or groups.get('nickname2'), groups.get('username'), groups.get('hostname'), groups.get('flags')) # target - target = groups.get('target') + if 'server2' in groups : + target = groups.get('server2') + + else : + target = groups.get('target') # data if 'message' in groups : @@ -148,11 +167,27 @@ elif 'topic' in groups : data = groups['topic'] + elif 'nick_list' in groups : + # split into components + list = groups['nick_list'].split(', ') + + # additional count? + if 'count' in groups and groups['count'] : + list.append('+%d' % int(groups['count'])) + + # join + data = ' '.join(list) + else : data = None + + # custom types? + if type == 'DAY_CHANGED' : + # new date + date = dtz - # build+return LogLine - return LogLine(channel, offset, type, dtz, source, target, data) + # build+return (date, LogLine) + return date, LogLine(channel, offset, type, dtz, source, target, data) def parse_lines (self, channel, lines, date=None, starting_offset=None) : """ @@ -169,7 +204,8 @@ # try and parse try : - line = self.parse_line(channel, line, date, offset) + # update date as needed + date, line = self.parse_line(channel, line, date, offset) # passthrough LogParseError's except LogParseError : diff -r d30c88e89a7e -r 6165f1ba458d log_source.py --- a/log_source.py Wed Feb 11 03:04:35 2009 +0200 +++ b/log_source.py Wed Feb 11 03:05:11 2009 +0200 @@ -377,7 +377,7 @@ # convert to date and use that return self._get_logfile_date(dtz.date()) - def _get_logfile_date (self, d, load=True, stat=True, ignore_missing=True) : + def _get_logfile_date (self, d, load=True, stat=False, ignore_missing=True) : """ Get the logfile corresponding to the given naive date in our timezone.