qmsk/irclogs/log_formatter.py
changeset 140 6db2527b67cf
parent 109 ca82d0fee336
child 151 6b8b6e056cdb
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/log_formatter.py	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,257 @@
+"""
+    Format LogLines into some other representation
+"""
+
+import re, xml.sax.saxutils
+
+from log_line import LogTypes
+from log_formatter_pil import PILImageFormatter
+from log_formatter_rss import RSSFormatter
+
+class LogFormatter (object) :
+    """
+        Provides a method to format series of LogLines into various output formats, with varying themes.
+    """
+
+    # machine-readable name
+    name = None
+
+    # human-readable name
+    title = None
+
+    ## parameters
+    # use a fixed-width font for HTML output
+    html_fixedwidth = True
+
+    def __init__ (self, tz, timestamp_fmt, img_ttf_path, img_font_size) :
+        """
+            Initialize to format timestamps with the given timezone and timestamp.
+
+            Use the given TTF font to render image text with the given size, if given, otherwise, a default one.
+        """
+        
+        # store
+        self.tz = tz
+        self.timestamp_fmt = timestamp_fmt
+        self.img_ttf_path = img_ttf_path
+        self.img_font_size = img_font_size
+        
+        # XXX: harcoded
+        self.date_fmt = '%Y-%m-%d'
+    
+    def _format_line_text (self, line, template_dict, type=None, full_timestamp=False, **extra) :
+        """
+            Format the given line as text, using the given { type: string template } dict.
+            
+            If type is given, then it overrides line.type
+
+            Any additional keyword args will also be available for the template to use
+        """
+
+        # default type?
+        if type is None :
+            type = line.type
+            
+        # look up the template
+        if type in template_dict :
+            template = template_dict[type]
+
+        else :
+            raise Exception("Format template not defined for type: %s" % LogTypes.name_from_code(type))
+        
+        # convert timestamp into display timezone
+        dtz = line.timestamp.astimezone(self.tz)
+        
+        # full timestamps?
+        if full_timestamp :
+            # XXX: let the user define a 'datetime' format instead?
+            timestamp_fmt = self.date_fmt + ' ' + self.timestamp_fmt
+
+        else :
+            timestamp_fmt = self.timestamp_fmt
+        
+        # breakdown source
+        source_nickname, source_username, source_hostname, source_chanflag = line.source
+        target_nickname = line.target
+        
+        # format with dict
+        return template % dict(
+            channel_name    = line.channel.name,
+            datetime        = dtz.strftime('%a %b %d %H:%M:%S %Y'),
+            date            = dtz.strftime(self.date_fmt),
+            timestamp       = dtz.strftime(timestamp_fmt),
+            source_nickname = source_nickname,
+            source_username = source_username,
+            source_hostname = source_hostname,
+            source_chanflag = source_chanflag,
+            target_nickname = target_nickname,
+            message         = line.data,
+            **extra
+        )
+    
+    def format_txt (self, lines, full_timestamps=False) :
+        """
+            Format given lines as plaintext.
+
+            If full_timestamps is given, the output will contain full timestamps with both date and time.
+
+            No trailing newlines.
+        """
+
+        abstract
+
+    def format_html (self, lines, full_timestamps=False) :
+        """
+            Format as HTML.
+            
+            See format_txt for information about arguments
+        """
+
+        abstract
+    
+    def format_png (self, lines, full_timestamps=False) :
+        """
+            Format as a PNG image, returning the binary PNG data
+        """
+
+        abstract
+    
+    def format_rss (self, lines, full_timestamps=False) :
+        """
+            Format as an XML RSS document
+        """
+        
+        abstract
+
+class BaseHTMLFormatter (LogFormatter) :
+    """
+        Implements some HTML-formatting utils
+    """
+    
+    # parameters
+    html_fixedwidth = True
+
+    # regexp to match URLs
+    URL_REGEXP = re.compile(r"http://\S+")
+
+    def _process_links (self, line) :
+        """
+            Processed the rendered line, adding in <a href>'s for things that look like URLs, returning the new line.
+
+            The line should already be escaped
+        """
+
+        def _encode_url (match) :
+            # encode URL
+            url_html = match.group(0)
+            url_link = xml.sax.saxutils.unescape(url_html)
+
+            return '<a href="%(url_link)s">%(url_html)s</a>' % dict(url_link=url_link, url_html=url_html)
+
+        return self.URL_REGEXP.sub(_encode_url, line)
+ 
+    def format_html (self, lines, **kwargs) :
+        """
+            Just uses format_txt, but processes links, etc
+        """
+        
+        # format using IrssiTextFormatter
+        for line, txt in self.format_txt(lines, **kwargs) :
+            # escape HTML
+            html = xml.sax.saxutils.escape(txt)
+
+            # process links
+            html = self._process_links(html)
+
+            # yield
+            yield line, html
+
+   
+class IrssiTextFormatter (RSSFormatter, PILImageFormatter, LogFormatter) :
+    """
+        Implements format_txt for irssi-style output
+    """
+
+    # format definitions by type
+    __FMT = {
+        LogTypes.RAW        : "%(timestamp)s %(data)s",
+        LogTypes.LOG_OPEN   : "--- Log opened %(datetime)s",
+        LogTypes.LOG_CLOSE  : "--- Log closed %(datetime)s",
+        'DAY_CHANGED'       : "--- Day changed %(date)s",
+
+        LogTypes.MSG        : "%(timestamp)s <%(source_chanflag)s%(source_nickname)s> %(message)s",
+        LogTypes.NOTICE     : "%(timestamp)s -%(source_nickname)s- %(message)s",
+        LogTypes.ACTION     : "%(timestamp)s  * %(source_nickname)s %(message)s",
+
+        LogTypes.JOIN       : "%(timestamp)s -!- %(source_nickname)s [%(source_username)s@%(source_hostname)s] has joined %(channel_name)s",
+        LogTypes.PART       : "%(timestamp)s -!- %(source_nickname)s [%(source_username)s@%(source_hostname)s] has left %(channel_name)s [%(message)s]",
+        LogTypes.KICK       : "%(timestamp)s -!- %(target_nickname)s was kicked from %(channel_name)s by %(source_nickname)s [%(message)s]",
+        LogTypes.MODE       : "%(timestamp)s -!- mode/%(channel_name)s [%(message)s] by %(source_nickname)s",
+
+        LogTypes.NICK       : "%(timestamp)s -!- %(source_nickname)s is now known as %(target_nickname)s",
+        LogTypes.QUIT       : "%(timestamp)s -!- %(source_nickname)s [%(source_username)s@%(source_hostname)s] has quit [%(message)s]",
+
+        LogTypes.TOPIC      : "%(timestamp)s -!- %(source_nickname)s changed the topic of %(channel_name)s to: %(message)s",
+        'TOPIC_UNSET'       : "%(timestamp)s -!- Topic unset by %(source_nickname)s on %(channel_name)s",
+
+        LogTypes.SELF_NOTICE: "%(timestamp)s -%(source_nickname)s- %(message)s",
+        LogTypes.SELF_NICK  : "%(timestamp)s -!- %(source_nickname)s is now known as %(target_nickname)s",
+
+        LogTypes.NETSPLIT_START : 
+                              "%(timestamp)s -!- Netsplit %(source_hostname)s <-> %(target_nickname)s quits: %(_netsplit_targets)s",
+        LogTypes.NETSPLIT_END :
+                              "%(timestamp)s -!- Netsplit over, joins: %(_netsplit_targets)s",
+    }
+
+    def format_txt (self, lines, full_timestamps=False) :
+        # ...handle each line
+        for line in lines :
+            # extra args
+            extra = {}
+            
+            # default to line.type
+            type = line.type
+
+            # special formatting for unset-Topic
+            if line.type == LogTypes.TOPIC and line.data is None :
+                type = 'TOPIC_UNSET'
+            
+            # format netsplit stuff
+            elif line.type & LogTypes._NETSPLIT_MASK :
+                # format the netsplit-targets stuff
+                extra['_netsplit_targets'] = line.data
+
+            # using __TYPES
+            yield line, self._format_line_text(line, self.__FMT, type, full_timestamps, **extra)
+
+class IrssiFormatter (BaseHTMLFormatter, IrssiTextFormatter) :
+    """
+        Implements plain black-and-white irssi-style formatting
+    """
+    
+    # name
+    name = 'irssi'
+    title = "Irssi (plain)"
+
+class DebugFormatter (BaseHTMLFormatter) :
+    """
+        Implements a raw debug-style formatting of LogLines
+    """
+
+    # name
+    name = 'debug'
+    title = "Raw debugging format"
+    
+    def format_txt (self, lines, full_timestamps=False) :
+        # iterate
+        for line in lines :
+            # just dump
+            yield line, unicode(line)
+
+def by_name (name) :
+    """
+        Lookup and return a class LogFormatter by name
+    """
+
+    return FORMATTERS[name]
+