log_formatter.py
author Tero Marttila <terom@fixme.fi>
Mon, 16 Feb 2009 02:55:17 +0200
changeset 136 c69a176b3620
parent 109 ca82d0fee336
permissions -rw-r--r--
better string truncation for error messages
"""
    Format LogLines into some other representation
"""

import re, xml.sax.saxutils

from log_line import LogTypes
from log_formatter_pil import PILImageFormatter
from log_formatter_rss import RSSFormatter

class LogFormatter (object) :
    """
        Provides a method to format series of LogLines into various output formats, with varying themes.
    """

    # machine-readable name
    name = None

    # human-readable name
    title = None

    ## parameters
    # use a fixed-width font for HTML output
    html_fixedwidth = True

    def __init__ (self, tz, timestamp_fmt, img_ttf_path, img_font_size) :
        """
            Initialize to format timestamps with the given timezone and timestamp.

            Use the given TTF font to render image text with the given size, if given, otherwise, a default one.
        """
        
        # store
        self.tz = tz
        self.timestamp_fmt = timestamp_fmt
        self.img_ttf_path = img_ttf_path
        self.img_font_size = img_font_size
        
        # XXX: harcoded
        self.date_fmt = '%Y-%m-%d'
    
    def _format_line_text (self, line, template_dict, type=None, full_timestamp=False, **extra) :
        """
            Format the given line as text, using the given { type: string template } dict.
            
            If type is given, then it overrides line.type

            Any additional keyword args will also be available for the template to use
        """

        # default type?
        if type is None :
            type = line.type
            
        # look up the template
        if type in template_dict :
            template = template_dict[type]

        else :
            raise Exception("Format template not defined for type: %s" % LogTypes.name_from_code(type))
        
        # convert timestamp into display timezone
        dtz = line.timestamp.astimezone(self.tz)
        
        # full timestamps?
        if full_timestamp :
            # XXX: let the user define a 'datetime' format instead?
            timestamp_fmt = self.date_fmt + ' ' + self.timestamp_fmt

        else :
            timestamp_fmt = self.timestamp_fmt
        
        # breakdown source
        source_nickname, source_username, source_hostname, source_chanflag = line.source
        target_nickname = line.target
        
        # format with dict
        return template % dict(
            channel_name    = line.channel.name,
            datetime        = dtz.strftime('%a %b %d %H:%M:%S %Y'),
            date            = dtz.strftime(self.date_fmt),
            timestamp       = dtz.strftime(timestamp_fmt),
            source_nickname = source_nickname,
            source_username = source_username,
            source_hostname = source_hostname,
            source_chanflag = source_chanflag,
            target_nickname = target_nickname,
            message         = line.data,
            **extra
        )
    
    def format_txt (self, lines, full_timestamps=False) :
        """
            Format given lines as plaintext.

            If full_timestamps is given, the output will contain full timestamps with both date and time.

            No trailing newlines.
        """

        abstract

    def format_html (self, lines, full_timestamps=False) :
        """
            Format as HTML.
            
            See format_txt for information about arguments
        """

        abstract
    
    def format_png (self, lines, full_timestamps=False) :
        """
            Format as a PNG image, returning the binary PNG data
        """

        abstract
    
    def format_rss (self, lines, full_timestamps=False) :
        """
            Format as an XML RSS document
        """
        
        abstract

class BaseHTMLFormatter (LogFormatter) :
    """
        Implements some HTML-formatting utils
    """
    
    # parameters
    html_fixedwidth = True

    # regexp to match URLs
    URL_REGEXP = re.compile(r"http://\S+")

    def _process_links (self, line) :
        """
            Processed the rendered line, adding in <a href>'s for things that look like URLs, returning the new line.

            The line should already be escaped
        """

        def _encode_url (match) :
            # encode URL
            url_html = match.group(0)
            url_link = xml.sax.saxutils.unescape(url_html)

            return '<a href="%(url_link)s">%(url_html)s</a>' % dict(url_link=url_link, url_html=url_html)

        return self.URL_REGEXP.sub(_encode_url, line)
 
    def format_html (self, lines, **kwargs) :
        """
            Just uses format_txt, but processes links, etc
        """
        
        # format using IrssiTextFormatter
        for line, txt in self.format_txt(lines, **kwargs) :
            # escape HTML
            html = xml.sax.saxutils.escape(txt)

            # process links
            html = self._process_links(html)

            # yield
            yield line, html

   
class IrssiTextFormatter (RSSFormatter, PILImageFormatter, LogFormatter) :
    """
        Implements format_txt for irssi-style output
    """

    # format definitions by type
    __FMT = {
        LogTypes.RAW        : "%(timestamp)s %(data)s",
        LogTypes.LOG_OPEN   : "--- Log opened %(datetime)s",
        LogTypes.LOG_CLOSE  : "--- Log closed %(datetime)s",
        'DAY_CHANGED'       : "--- Day changed %(date)s",

        LogTypes.MSG        : "%(timestamp)s <%(source_chanflag)s%(source_nickname)s> %(message)s",
        LogTypes.NOTICE     : "%(timestamp)s -%(source_nickname)s- %(message)s",
        LogTypes.ACTION     : "%(timestamp)s  * %(source_nickname)s %(message)s",

        LogTypes.JOIN       : "%(timestamp)s -!- %(source_nickname)s [%(source_username)s@%(source_hostname)s] has joined %(channel_name)s",
        LogTypes.PART       : "%(timestamp)s -!- %(source_nickname)s [%(source_username)s@%(source_hostname)s] has left %(channel_name)s [%(message)s]",
        LogTypes.KICK       : "%(timestamp)s -!- %(target_nickname)s was kicked from %(channel_name)s by %(source_nickname)s [%(message)s]",
        LogTypes.MODE       : "%(timestamp)s -!- mode/%(channel_name)s [%(message)s] by %(source_nickname)s",

        LogTypes.NICK       : "%(timestamp)s -!- %(source_nickname)s is now known as %(target_nickname)s",
        LogTypes.QUIT       : "%(timestamp)s -!- %(source_nickname)s [%(source_username)s@%(source_hostname)s] has quit [%(message)s]",

        LogTypes.TOPIC      : "%(timestamp)s -!- %(source_nickname)s changed the topic of %(channel_name)s to: %(message)s",
        'TOPIC_UNSET'       : "%(timestamp)s -!- Topic unset by %(source_nickname)s on %(channel_name)s",

        LogTypes.SELF_NOTICE: "%(timestamp)s -%(source_nickname)s- %(message)s",
        LogTypes.SELF_NICK  : "%(timestamp)s -!- %(source_nickname)s is now known as %(target_nickname)s",

        LogTypes.NETSPLIT_START : 
                              "%(timestamp)s -!- Netsplit %(source_hostname)s <-> %(target_nickname)s quits: %(_netsplit_targets)s",
        LogTypes.NETSPLIT_END :
                              "%(timestamp)s -!- Netsplit over, joins: %(_netsplit_targets)s",
    }

    def format_txt (self, lines, full_timestamps=False) :
        # ...handle each line
        for line in lines :
            # extra args
            extra = {}
            
            # default to line.type
            type = line.type

            # special formatting for unset-Topic
            if line.type == LogTypes.TOPIC and line.data is None :
                type = 'TOPIC_UNSET'
            
            # format netsplit stuff
            elif line.type & LogTypes._NETSPLIT_MASK :
                # format the netsplit-targets stuff
                extra['_netsplit_targets'] = line.data

            # using __TYPES
            yield line, self._format_line_text(line, self.__FMT, type, full_timestamps, **extra)

class IrssiFormatter (BaseHTMLFormatter, IrssiTextFormatter) :
    """
        Implements plain black-and-white irssi-style formatting
    """
    
    # name
    name = 'irssi'
    title = "Irssi (plain)"

class DebugFormatter (BaseHTMLFormatter) :
    """
        Implements a raw debug-style formatting of LogLines
    """

    # name
    name = 'debug'
    title = "Raw debugging format"
    
    def format_txt (self, lines, full_timestamps=False) :
        # iterate
        for line in lines :
            # just dump
            yield line, unicode(line)

def by_name (name) :
    """
        Lookup and return a class LogFormatter by name
    """

    return FORMATTERS[name]