implement parser/formatter netsplits and day-change
authorTero Marttila <terom@fixme.fi>
Wed, 11 Feb 2009 03:05:11 +0200
changeset 97 6165f1ba458d
parent 96 d30c88e89a7e
child 98 8c6e36849f9a
implement parser/formatter netsplits and day-change
log_formatter.py
log_line.py
log_parser.py
log_source.py
--- a/log_formatter.py	Wed Feb 11 03:04:35 2009 +0200
+++ b/log_formatter.py	Wed Feb 11 03:05:11 2009 +0200
@@ -29,17 +29,23 @@
 
             Use the given TTF font to render image text with the given size, if given, otherwise, a default one.
         """
-
+        
+        # store
         self.tz = tz
         self.timestamp_fmt = timestamp_fmt
         self.img_ttf_path = img_ttf_path
         self.img_font_size = img_font_size
+        
+        # XXX: harcoded
+        self.date_fmt = '%Y-%m-%d'
     
-    def _format_line_text (self, line, template_dict, type=None, full_timestamp=False) :
+    def _format_line_text (self, line, template_dict, type=None, full_timestamp=False, **extra) :
         """
             Format the given line as text, using the given { type: string template } dict.
             
             If type is given, then it overrides line.type
+
+            Any additional keyword args will also be available for the template to use
         """
 
         # default type?
@@ -47,15 +53,19 @@
             type = line.type
             
         # look up the template
-        template = template_dict[type]
+        if type in template_dict :
+            template = template_dict[type]
+
+        else :
+            raise Exception("Format template not defined for type: %s" % LogTypes.name_from_code(type))
         
         # convert timestamp into display timezone
         dtz = line.timestamp.astimezone(self.tz)
         
         # full timestamps?
         if full_timestamp :
-            # XXX: ugly hack
-            timestamp_fmt = '%Y-%m-%d ' + self.timestamp_fmt
+            # XXX: let the user define a 'datetime' format instead?
+            timestamp_fmt = self.date_fmt + self.timestamp_fmt
 
         else :
             timestamp_fmt = self.timestamp_fmt
@@ -68,6 +78,7 @@
         return template % dict(
             channel_name    = line.channel.name,
             datetime        = dtz.strftime('%a %b %d %H:%M:%S %Y'),
+            date            = dtz.strftime(self.date_fmt),
             timestamp       = dtz.strftime(timestamp_fmt),
             source_nickname = source_nickname,
             source_username = source_username,
@@ -75,6 +86,7 @@
             source_chanflag = source_chanflag,
             target_nickname = target_nickname,
             message         = line.data,
+            **extra
         )
     
     def format_txt (self, lines, full_timestamps=False) :
@@ -165,6 +177,7 @@
         LogTypes.RAW        : "%(timestamp)s %(data)s",
         LogTypes.LOG_OPEN   : "--- Log opened %(datetime)s",
         LogTypes.LOG_CLOSE  : "--- Log closed %(datetime)s",
+        'DAY_CHANGED'       : "--- Day changed %(date)s",
 
         LogTypes.MSG        : "%(timestamp)s <%(source_chanflag)s%(source_nickname)s> %(message)s",
         LogTypes.NOTICE     : "%(timestamp)s -%(source_nickname)s- %(message)s",
@@ -183,20 +196,33 @@
 
         LogTypes.SELF_NOTICE: "%(timestamp)s -%(source_nickname)s- %(message)s",
         LogTypes.SELF_NICK  : "%(timestamp)s -!- %(source_nickname)s is now known as %(target_nickname)s",
+
+        LogTypes.NETSPLIT_START : 
+                              "%(timestamp)s -!- Netsplit %(source_hostname)s <-> %(target_nickname)s quits: %(_netsplit_targets)s",
+        LogTypes.NETSPLIT_END :
+                              "%(timestamp)s -!- Netsplit over, joins: %(_netsplit_targets)s",
     }
 
     def format_txt (self, lines, full_timestamps=False) :
         # ...handle each line
         for line in lines :
-            # specialcases
+            # extra args
+            extra = {}
+
+            # specialcase type?
             if line.type == LogTypes.TOPIC and line.data is None :
                 type = 'TOPIC_UNSET'
             
             else :
                 type = line.type
 
+            # format netsplit stuff
+            if line.type & LogTypes._NETSPLIT_MASK :
+                # format the netsplit-targets stuff
+                extra['_netsplit_targets'] = line.data
+
             # using __TYPES
-            yield line, self._format_line_text(line, self.__FMT, type, full_timestamps)
+            yield line, self._format_line_text(line, self.__FMT, type, full_timestamps, **extra)
 
 class IrssiFormatter (BaseHTMLFormatter, IrssiTextFormatter) :
     """
@@ -220,7 +246,7 @@
         # iterate
         for line in lines :
             # just dump
-            yield line, str(line)
+            yield line, unicode(line)
 
 def by_name (name) :
     """
--- a/log_line.py	Wed Feb 11 03:04:35 2009 +0200
+++ b/log_line.py	Wed Feb 11 03:05:11 2009 +0200
@@ -81,6 +81,14 @@
 
         # we (<source>) changed nickname to <target>
         ('SELF_NICK',   0x52),
+
+        ## slightly weirder bits
+        # netsplit between <source_hostname> and <target_hostname>, <data> is a space-separated list of <chanflags><nickname>s affected
+        # the last item in the list of nicknames may also be of the form "+<count>", where count is the number of additional, but hidden, nicknames affected
+        ('NETSPLIT_START',  0x61),
+
+        # netsplit over, <data> is a list of users affected, see NETSPLIT_START
+        ('NETSPLIT_END',    0x062),
     ]
     
     @classmethod
@@ -95,6 +103,9 @@
 for name, code in LogTypes.LIST :
     setattr(LogTypes, name, code)
 
+# masks
+LogTypes._NETSPLIT_MASK = 0x60
+
 class LogLine (object) :
     """
         An event on some specific channel
@@ -157,7 +168,7 @@
             '@' + host if host else ''
         )
    
-    def __str__ (self) :
+    def __unicode__ (self) :
         return '\t'.join((
             self.channel.name,
             str(self.offset),
@@ -165,7 +176,7 @@
             str(self.timestamp),
             self.format_source(),
             str(self.target),
-            str(self.data)
+            unicode(self.data)
         ))
 
     def __repr__ (self) :
--- a/log_parser.py	Wed Feb 11 03:04:35 2009 +0200
+++ b/log_parser.py	Wed Feb 11 03:05:11 2009 +0200
@@ -12,7 +12,7 @@
         Parsing some line failed
     """
 
-    def __init__ (self, offset, line, message) :
+    def __init__ (self, line, offset, message) :
         super(LogParseError, self).__init__("%r@%s: %s" % (line, offset, message))
 
 class LogParser (object) :
@@ -56,6 +56,8 @@
     _CHAN2 = r'(?P<channel2>.+?)'
     _USERHOST = r'(?P<username>.*?)@(?P<hostname>.*?)'
     _MSG = r'(?P<message>.*)'
+    _SRV1 = r'(?P<server1>.+?)'
+    _SRV2 = r'(?P<server2>.+?)'
 
     # regular expressions for matching lines, by type
     TYPE_EXPRS = (
@@ -73,7 +75,12 @@
         (   LogTypes.TOPIC,         _TS + r' -!- (' + _NICK + ' changed the topic of ' + _CHAN + ' to: (?P<topic>.*)|Topic unset by ' + _NICK2 + ' on ' + _CHAN2 + ')'    ),
 
         (   LogTypes.SELF_NOTICE,   _TS + r' \[notice\(' + _CHAN + '\)\] ' + _MSG                   ),
-        (   LogTypes.SELF_NICK,     _TS + r' -!- You\'re now known as (?P<target>\S+)'           ),
+        (   LogTypes.SELF_NICK,     _TS + r' -!- You\'re now known as (?P<target>\S+)'              ),
+
+        (   LogTypes.NETSPLIT_START,    _TS + r' -!- Netsplit ' + _SRV1 + ' <-> ' + _SRV2 + ' quits: (?P<nick_list>[^(]+)( \(\+(?P<count>\d+) more,\S+\))?'),
+        (   LogTypes.NETSPLIT_END,      _TS + r' -!- Netsplit over, joins: (?P<nick_list>[^(]+)( \(\+(?P<count>\d+) more\))?'              ),
+
+        (   'DAY_CHANGED',          r'--- Day changed (?P<date>.+)'                                 ),
     )
 
     # precompile
@@ -104,7 +111,7 @@
         
         # no match found?
         if not match :
-            raise LogParseError(offset, line, "Line did not match any type")
+            raise LogParseError(line, offset, "Line did not match any type")
         
         # match groups
         groups = match.groupdict(None)
@@ -122,9 +129,13 @@
             if date :
                 dt = dt.replace(year=date.year, month=date.month, day=date.day)
 
+        elif 'date' in groups :
+            # parse date-only datetime
+            dt = datetime.datetime.strptime(groups['date'], '%a %b %d %Y')
+
         else :
             # no timestamp !?
-            raise LogParseError(offset, line, "No timestamp")
+            raise LogParseError(line, offset, "No timestamp")
 
         # now localize with timezone
         dtz = self.tz.localize(dt)
@@ -133,10 +144,18 @@
         channel_name = (groups.get('channel') or groups.get('channel2'))
 
         # source
-        source = (groups.get('nickname') or groups.get('nickname2'), groups.get('username'), groups.get('hostname'), groups.get('flags'))
+        if 'server1' in groups :
+            source = (None, None, groups.get('server1'), None)
+
+        else :
+            source = (groups.get('nickname') or groups.get('nickname2'), groups.get('username'), groups.get('hostname'), groups.get('flags'))
 
         # target
-        target = groups.get('target')
+        if 'server2' in groups :
+            target = groups.get('server2')
+
+        else :
+            target = groups.get('target')
 
         # data
         if 'message' in groups :
@@ -148,11 +167,27 @@
         elif 'topic' in groups :
             data = groups['topic']
         
+        elif 'nick_list' in groups :
+            # split into components
+            list = groups['nick_list'].split(', ')
+            
+            # additional count?
+            if 'count' in groups and groups['count'] :
+                list.append('+%d' % int(groups['count']))
+            
+            # join
+            data = ' '.join(list)
+
         else :
             data = None
+        
+        # custom types?
+        if type == 'DAY_CHANGED' :
+            # new date
+            date = dtz
 
-        # build+return LogLine
-        return LogLine(channel, offset, type, dtz, source, target, data)
+        # build+return (date, LogLine)
+        return date, LogLine(channel, offset, type, dtz, source, target, data)
 
     def parse_lines (self, channel, lines, date=None, starting_offset=None) :
         """
@@ -169,7 +204,8 @@
             
             # try and parse
             try :
-                line = self.parse_line(channel, line, date, offset)
+                # update date as needed
+                date, line = self.parse_line(channel, line, date, offset)
             
             # passthrough LogParseError's
             except LogParseError :
--- a/log_source.py	Wed Feb 11 03:04:35 2009 +0200
+++ b/log_source.py	Wed Feb 11 03:05:11 2009 +0200
@@ -377,7 +377,7 @@
         # convert to date and use that
         return self._get_logfile_date(dtz.date())
 
-    def _get_logfile_date (self, d, load=True, stat=True, ignore_missing=True) :
+    def _get_logfile_date (self, d, load=True, stat=False, ignore_missing=True) :
         """
             Get the logfile corresponding to the given naive date in our timezone.