qmsk.irclogs: changeset 140:6db2527b67cf

--- a/__init__.py	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-"""
-    The irclogs.qmsk.net site is an IRC log browser
-"""
-
-# the URL mapper
-import urls
-
-# our RequestHandler
-handler = urls.mapper
-

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/index.cgi	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,49 @@
+#!/usr/bin/python2.5
+
+"""
+    CGI mode using qmsk.web.cgi
+"""
+
+def error () :
+    """
+        Dumps out a raw traceback of the current exception to stdout, call from except.
+
+        Used for low-level ImportError's
+    """
+    
+    import sys
+
+    # if this import fails, we're doomed
+    from qmsk.irclogs import error
+    
+    # format info
+    status, content_type, body = error.build_error()
+    
+    # HTTP headers+body
+    sys.stdout.write('Status: %s\r\n' % status)
+    sys.stdout.write('Content-type: %s\r\n' % content_type)
+    sys.stdout.write('\r\n')
+    sys.stdout.write(body)
+    
+def main () :
+    """
+        Build our wsgi.Application and run
+    """
+
+    try :
+        from qmsk.web import cgi_main
+        from qmsk.irclogs import wsgi
+
+        # create app
+        app = wsgi.Application()
+        
+        # run once
+        cgi_main.run(app)
+
+    except :
+        # display error on stdout
+        error()
+    
+if __name__ == '__main__' :
+    main()
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/index.fcgi	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,26 @@
+#!/usr/bin/python2.5
+# :set filetype=py
+
+"""
+    FastCGI mode using qmsk.web.fastcgi_main
+"""
+
+from qmsk.web import fastcgi_main
+
+# XXX: error handling for imports? Lighttp sucks hard at this
+from qmsk.irclogs import wsgi
+
+def main () :
+    """
+        Build our WSGIApplication and run
+    """
+
+    # create app
+    app = wsgi.Application()
+
+    # run once
+    fastcgi_main.run(app)
+
+if __name__ == '__main__' :
+    main()
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/search-index	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,640 @@
+#!/usr/bin/env python2.5
+
+"""
+    Tool for accessing the search index
+"""
+
+# XXX: fix path
+import sys; sys.path.insert(0, '.'); sys.path.insert(0, '..')
+
+import os, os.path, fcntl
+import datetime, pytz
+import optparse
+
+# configuration and the LogSearchIndex module
+from qmsk.irclogs import config, utils, log_search, channels
+
+def _open_index (options, open_mode) :
+    """
+        Opens the LogSearchIndex
+    """
+
+    return log_search.LogSearchIndex(config.LOG_CHANNELS, options.index_path, open_mode)
+
+
+def _open_index_and_channel (options, channel_name, open_mode) :
+    """
+        Opens+returns a LogSearchIndex and a LogChannel
+    """
+    
+    # open the LogSearchIndex
+    index = _open_index(options, open_mode)
+
+    # open the channel
+    channel = config.LOG_CHANNELS.lookup(channel_name)
+    
+    # return
+    return index, channel
+
+def _iter_insert_stats (index, channel, lines) :
+    """
+        Insert the given lines into the index.
+
+        Assumes the lines will be in time-order, and yields a series of (date, count) tuples for every date that lines
+        are inserted for
+    """
+
+    # last date
+    date = None
+
+    # count
+    count = 0
+
+    # iter lines
+    for line in lines :
+        # next day?
+        if not date or line.timestamp.date() != date :
+            if date :
+                # yield stats
+                yield date, count
+
+            # reset count
+            count = 0
+
+            # timestamp's date
+            date = line.timestamp.date()
+
+        # insert
+        index.insert_line(channel, line)
+
+        # count
+        count += 1
+    
+    # final count?
+    if date and count :
+        yield date, count
+
+def _insert_lines (index, options, channel, lines) :
+    """
+        Insert the given lines into the index.
+
+        Assumes the lines will be in time-order, and prints out as status messages the date and count for the inserted lines
+    """
+    
+    # iterate insert stats
+    for date, count in _iter_insert_stats(index, channel, lines) :
+        # output date header?
+        if not options.quiet :
+            print "%s: %s" % (date.strftime('%Y-%m-%d'), count),
+
+def _load_channel_date (index, options, channel, date) :
+    """
+        Loads the logs for the given date from the channel's LogSource into the given LogSearchIndex
+    """
+
+    if not options.quiet :
+        print "Loading date for channel %s" % channel.id
+        
+    try :
+        # load lines for date
+        lines = channel.source.get_date(date)
+    
+    except Exception, e :
+        if not options.skip_missing :
+            raise
+            
+        if not options.quiet :
+            print "\tSkipped: %s" % (e, )
+    
+    else :
+        # insert
+        _insert_lines(index, options, channel, lines)
+
+def _parse_date (options, date_str, tz=None, fmt='%Y-%m-%d') :
+    """
+        Parse the given datetime, using the given timezone(defaults to options.tz) and format
+    """
+
+    # default tz
+    if not tz :
+        tz = options.timezone
+
+    try :
+        # parse
+        return datetime.datetime.strptime(date_str, fmt).replace(tzinfo=tz)
+
+    except Exception, e :
+        raise CommandError("[ERROR] Invalid date: %s: %s" % (date_str, e))
+
+def _output_lines (options, lines) :
+    """
+        Display the formatted LogLines
+    """
+
+    # display as plaintext
+    for line, txt_data in options.formatter.format_txt(lines, full_timestamps=True) :
+        print txt_data
+
+class CommandError (Exception) :
+    """
+        Error with command-line arguments
+    """
+
+    pass
+
+def cmd_create (options) :
+    """
+        Creates a new index
+    """
+
+    # open index
+    index = _open_index(options, 'ctrunc' if options.force else 'c')
+
+    # that's all
+    pass
+
+def cmd_load (options, channel_name, *dates) :
+    """
+        Loads the logs for a specific channel for the given dates (in terms of the channe logs' timezone) into the index
+    """
+
+    # open index/channel
+    index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a')
+    
+    # handle each date
+    for date_str in dates :
+        # prase date
+        try :
+            date = _parse_date(options, date_str, channel.source.tz)
+        
+        # handle errors
+        except CommandError, e :
+            if options.skip_missing :
+                print "[ERROR] %s" % (date_name, e)
+
+            else :
+                raise
+        
+        # otherwise, load
+        else :        
+            _load_channel_date(index, options, channel, date)
+
+def cmd_load_month (options, channel_name, *months) :
+    """
+        Loads the logs for a specific channel for the given months (in terms of the channel's timezone) into the index
+    """
+
+    # open index/channel
+    index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a')
+    
+    # handle each date
+    for month_str in months :
+        # prase date
+        try :
+            month = _parse_date(options, month_str, channel.source.tz, '%Y-%m')
+        
+        # handle errors
+        except CommandError, e :
+            # skip?
+            if options.skip_missing :
+                if not options.quiet :
+                    print "[ERROR] %s" % (date_name, e)
+                continue
+
+            else :
+                raise
+        
+        # get the set of days
+        days = list(channel.source.get_month_days(month))
+        
+        if not options.quiet :
+            print "Loading %d days of logs:" % (len(days))
+
+        # load each day
+        for date in days :
+            # convert to datetime
+            dt = datetime.datetime.combine(date, datetime.time(0)).replace(tzinfo=channel.source.tz)
+            
+            # load
+            _load_channel_date(index, options, channel, dt)
+
+def cmd_search (options, channel_name, query) :
+    """
+        Search the index for events on a specific channel with the given query
+    """
+    
+    # sanity-check
+    if options.create :
+        raise Exception("--create doesn't make sense for 'search'")
+    
+    # open index/channel
+    index, channel = _open_index_and_channel(options, channel_name, 'r')
+    
+    # search
+    lines = index.search_simple(channel, query)
+    
+    # display
+    _output_lines(options, lines)
+
+def cmd_list (options, channel_name, *dates) :
+    """
+        List the indexed events for a specific date
+    """
+
+    # sanity-check
+    if options.create :
+        raise Exception("--create doesn't make sense for 'search'")
+    
+    # open index/channel
+    index, channel = _open_index_and_channel(options, channel_name, 'r')
+
+    # ...for each date
+    for date_str in dates :
+        # parse date
+        date = _parse_date(options, date_str)
+
+        # list
+        lines = index.list(channel, date)
+        
+        # display
+        _output_lines(options, lines)
+
+def _autoload_reset (options, channels) :
+    """
+        Reset old autoload state
+    """
+    
+    # warn
+    if not options.quiet :
+        print "[WARN] Resetting autoload state for: %s" % ', '.join(channel.id for channel in channels)
+    
+    # iter
+    for channel in channels :
+        # statefile path
+        statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id)
+
+        # is it present?
+        if not os.path.exists(statefile_path) :
+            if not options.quiet :
+                print "[WARN] No statefile found at %s" % statefile_path
+        
+        else :
+            if not options.quiet :
+                print "\t%s: " % channel.id,
+
+            # remove the statefile
+            os.remove(statefile_path)
+            
+            if not options.quiet :
+                print "OK"
+
+def cmd_autoload (options, *channel_names) :
+    """
+        Automatically loads all channel logs that have not been indexed yet (by logfile mtime)
+    """
+    
+    # open index, nonblocking
+    index = _open_index(options, 'c?' if options.create else 'a?')
+
+    # default to all channels
+    if not channel_names :
+        channels = config.LOG_CHANNELS
+    
+    else :
+        channels = [config.LOG_CHANNELS.lookup(channel_name) for channel_name in channel_names]
+    
+    # reset autoload state?
+    if options.reset :
+        _autoload_reset(options, channels)
+        if not options.quiet :
+            print
+
+    # iterate channels
+    for channel in channels :
+        if not options.quiet :
+            print "Channel %s:" % channel.id
+
+        # no 'from' by default
+        after = None
+
+        # path to our state file
+        statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id)
+        statefile_tmppath = statefile_path + '.tmp'
+
+        # does it exist?
+        have_tmpfile = os.path.exists(statefile_tmppath)
+        
+        # do we have a tempfile from a previous crash?
+        if have_tmpfile and not options.ignore_resume :
+            # first, open it...
+            statefile_tmp = open(statefile_tmppath, 'r+')
+
+            # ... then lock it
+            fcntl.lockf(statefile_tmp, fcntl.LOCK_EX | fcntl.LOCK_NB)
+            
+            # read after timestamp
+            after_str = statefile_tmp.read().rstrip()
+
+            if after_str :
+                # parse timestamp
+                after = utils.from_utc_timestamp(int(after_str))
+
+                if not options.quiet :
+                    print "\tContinuing earlier progress from %s" % after
+
+            else :
+                # ignore
+                if not options.quiet :
+                    print "\t[WARN] Ignoring empty temporary statefile"
+
+        else :
+            # warn about old tmpfile that was ignored
+            if have_tmpfile and not options.quiet :
+                print "\t[WARN] Ignoring old tmpfile state"
+
+            # open new tempfile
+            statefile_tmp = open(statefile_tmppath, 'w')
+            
+            # lock
+            fcntl.lockf(statefile_tmp, fcntl.LOCK_EX | fcntl.LOCK_NB)
+
+        # override?
+        if options.reload :
+            # load all
+            mtime = None
+
+            if not options.quiet :
+                print "\tForcing reload!"
+
+        # stat for mtime
+        else :
+            # stat for mtime, None if unknown
+            mtime = utils.mtime(statefile_path, ignore_missing=True)
+
+            if mtime and not options.quiet :
+                print "\tLast load time was %s" % mtime
+
+            elif not options.quiet :
+                print "\t[WARN] No previous load state! Loading full logs"
+ 
+        # only after some specific date?
+        if options.after :
+            # use unless read from tempfile
+            if not after :
+                after = options.after
+               
+                if not options.quiet :
+                    print "\tOnly including dates from %s onwards" % after
+            
+            else :
+                if not options.quiet :
+                    print "\t[WARN] Ignoring --from because we found a tempfile"
+            
+        # only up to some specific date?
+        if options.until :
+            until = options.until
+
+            if not options.quiet :
+                print "\tOnly including dates up to (and including) %s" % until
+        else :
+            # default to now
+            until = None
+
+        # get lines
+        lines = channel.source.get_modified(mtime, after, until)
+        
+        # insert
+        if not options.quiet :
+            print "\tLoading and inserting..."
+            print
+     
+        # iterate insert() per day to display info and update progress
+        for date, count in _iter_insert_stats(index, channel, lines) :
+            # output date header?
+            if not options.quiet :
+                print "\t%10s: %d" % (date.strftime('%Y-%m-%d'), count)
+            
+            # write temp state
+            statefile_tmp.seek(0)
+            statefile_tmp.write(str(utils.to_utc_timestamp(datetime.datetime.combine(date, datetime.time(0)))))
+            statefile_tmp.flush()
+
+        # write autoload state
+        open(statefile_path, 'w').close()
+
+        # close+delete tempfile
+        statefile_tmp.close()
+        os.remove(statefile_tmppath)
+        
+        if not options.quiet :
+            print
+    
+    # done
+    return
+
+def cmd_help (options, *args) :
+    """
+        Help about commands
+    """
+
+    import inspect
+    
+    # general help stuff
+    options._parser.print_help()
+
+    # specific command?
+    if args :
+        # the command name
+        command, = args
+        
+        # XXX: display info about specific command
+        xxx
+    
+    # general
+    else :
+        print
+        print "Available commands:"
+
+        # build list of all cmd_* objects
+        cmd_objects = [(name, obj) for name, obj in globals().iteritems() if name.startswith('cmd_') and inspect.isfunction(obj)]
+
+        # sort alphabetically
+        cmd_objects.sort()
+        
+        # iterate through all cmd_* objects
+        for cmd_func_name, cmd_func in cmd_objects :
+            # remove cmd_ prefix
+            cmd_name = cmd_func_name[4:]
+
+            # inspect
+            cmd_args, cmd_varargs, cmd_varkw, cmd_default = inspect.getargspec(cmd_func)
+            cmd_doc = inspect.getdoc(cmd_func)
+
+            # remove the "options" arg
+            cmd_args = cmd_args[1:]
+
+            # display
+            print "\t%10s %-30s : %s" % (cmd_name, inspect.formatargspec(cmd_args, cmd_varargs, None, cmd_default), cmd_doc)
+
+class MyOption (optparse.Option) :
+    """
+        Our custom types for optparse
+    """
+
+    def check_date (option, opt, value) :
+        """
+            Parse a date
+        """
+
+        try :
+            # parse
+            return datetime.datetime.strptime(value, '%Y-%m-%d')
+        
+        # trap -> OptionValueError
+        except Exception, e :
+            raise optparse.OptionValueError("option %s: invalid date value: %r" % (opt, value))
+    
+    def check_timezone (option, opt, value) :
+        """
+            Parse a timezone
+        """
+
+        try :
+            # parse
+            return pytz.timezone(value)
+        
+        # trap -> OptionValueError
+        except Exception, e :
+            raise optparse.OptionValueError("option %s: invalid timezone: %r" % (opt, value))
+
+    def take_action (self, action, dest, opt, value, values, parser) :
+        """
+            Override take_action to handle date
+        """
+
+        if action == "parse_date" :
+            # get timezone
+            tz = values.timezone
+
+            # set timezone
+            value = value.replace(tzinfo=tz)
+
+            # store
+            return optparse.Option.take_action(self, 'store', dest, opt, value, values, parser)
+
+        else :
+            # default
+            return optparse.Option.take_action(self, action, dest, opt, value, values, parser)
+
+    TYPES = optparse.Option.TYPES + ('date', 'timezone')
+    TYPE_CHECKER = optparse.Option.TYPE_CHECKER.copy()
+    TYPE_CHECKER['date'] = check_date
+    TYPE_CHECKER['timezone'] = check_timezone
+    ACTIONS = optparse.Option.ACTIONS + ('parse_date', )
+    STORE_ACTIONS = optparse.Option.STORE_ACTIONS + ('parse_date', )
+    TYPED_ACTIONS = optparse.Option.TYPED_ACTIONS + ('parse_date', )
+    ACTIONS = optparse.Option.ACTIONS + ('parse_date', )
+
+def main (argv) :
+    """
+        Command-line main, with given argv
+    """
+
+    # define parser
+    parser = optparse.OptionParser(
+        usage           = "%prog [options] <command> [ ... ]",
+        add_help_option = False,
+        option_class    = MyOption,
+    )
+
+    # general options       #                   #                       #                                   #
+    general = optparse.OptionGroup(parser, "General Options")
+    general.add_option('-h', "--help",          dest="help",            help="Show this help message and exit",     
+                                                action="store_true"                                         )
+
+    general.add_option(     "--formatter",      dest="formatter_name",  help="LogFormatter to use",                 
+            metavar="FMT",  type="choice",                              default=config.PREF_FORMATTER_DEFAULT.name,
+            choices=[fmt_name for fmt_name in config.LOG_FORMATTERS.iterkeys()]                             )
+
+    general.add_option(     "--index",          dest="index_path",      help="Index database path",                 
+            metavar="PATH",                                             default=config.SEARCH_INDEX_PATH    )
+
+    general.add_option(     "--timezone",       dest="timezone",        help="Timezone for output",                 
+            metavar="TZ",   type="timezone",                            default=pytz.utc                    )
+
+    general.add_option(     "--force",          dest="force",           help="Force dangerous operation",           
+                                                action="store_true"                                         )
+
+    general.add_option(     "--quiet",          dest="quiet",           help="Supress status messages",             
+                                                action="store_true"                                         )
+    parser.add_option_group(general)
+    
+
+    # cmd_load options      #                   #                       #                                   #
+    load = optparse.OptionGroup(parser, "Load Options")
+    load.add_option(        "--skip-missing",   dest="skip_missing",    help="Skip missing logfiles",
+                                                action="store_true"                                         )
+
+    load.add_option(        "--create",         dest="create",          help="Create index database", 
+                                                action="store_true"                                         )
+    parser.add_option_group(load)
+    
+
+    # cmd_autoload options  #                   #                       #                                   #
+    autoload = optparse.OptionGroup(parser, "Autoload Options")
+    autoload.add_option(    "--autoload-state", dest="autoload_state_path", help="Path to autoload state dir",      
+            metavar="PATH",                                             default=config.SEARCH_AUTOINDEX_PATH)
+
+    autoload.add_option(    "--from",           dest="after",           help="Only autoload logfiles from the given date on", 
+            metavar="DATE", type="date",        action="parse_date",    default=None                        )
+
+    autoload.add_option(    "--until",          dest="until",           help="Only autoload logfiles up to (and including) the given date",  
+            metavar="DATE", type="date",        action="parse_date",    default=None                        )
+
+    autoload.add_option(    "--reload",         dest="reload",          help="Force reload lines",
+                                                action="store_true"                                         )
+
+    autoload.add_option(    "--reset",          dest="reset",           help="Reset old autload state",
+                                                action="store_true"                                         )
+
+    autoload.add_option(    "--ignore-resume",  dest="ignore_resume",   help="Do not try and resume interrupted autoload",  
+                                                action="store_true"                                         )
+    parser.add_option_group(autoload)
+
+    # parse
+    options, args = parser.parse_args(argv[1:])
+
+    # postprocess stuff
+    options._parser = parser
+    options.formatter = config.LOG_FORMATTERS[options.formatter_name](options.timezone, "%H:%M:%S", None, None)
+
+    # special-case --help
+    if options.help :
+        return cmd_help(options, *args)
+    
+    # must have at least the command argument
+    if not args :
+        raise CommandError("Missing command")
+    
+    # pop command
+    command = args.pop(0)
+    
+    # get func
+    func = globals().get('cmd_%s' % command)
+    
+    # unknown command?
+    if not func :
+        raise CommandError("Unknown command: %s" % command)
+    
+    # call
+    func(options, *args)
+
+if __name__ == '__main__' :
+    try :
+        main(sys.argv)
+        sys.exit(0)
+
+    except CommandError, e :
+        print e
+        sys.exit(1)
+

--- a/channels.py	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-"""
-    Our list of LogChannels
-"""
-
-class ChannelList (object) :
-    """
-        The list of channels, and related methods
-    """
-
-
-    def __init__ (self, channel_list) :
-        """
-            Initialize with the given channel dict
-        """
-        
-        self.channel_list = channel_list
-        self.channel_dict = dict((channel.id, channel) for channel in channel_list)
-
-    def lookup (self, channel_name) :
-        """
-            Looks up the LogChannel for the given name
-        """
-
-        return self.channel_dict[channel_name]
-    
-    def dict (self) :
-        """
-            Returns a { name: LogChannel } dict
-        """
-        return self.channel_dict
-
-    def __iter__ (self) :
-        """
-            Iterate over our defined LogChannel objects
-        """
-
-        return iter(self.channel_list)
-

--- a/config.py	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,112 +0,0 @@
-"""
-    Configureable defaults
-"""
-
-import os.path, pytz
-from log_parser import IrssiParser
-from log_channel import LogChannel
-from log_source import LogSourceDecoder, LogDirectory
-from log_formatter import IrssiFormatter, DebugFormatter
-from channels import ChannelList
-import log_formatter
-
-# build relative paths to the dir containing this file
-relpath = lambda path : os.path.join(os.path.dirname(__file__), path)
-
-###                     ###
-###    Configuration    ###
-###                     ###
-
-# timezone to use for logs
-LOG_TIMEZONE                    = pytz.timezone('Europe/Helsinki')
-
-# timestamp format for logfiles
-LOG_TIMESTAMP_FMT               = '%H:%M:%S'
-
-# the decoder used for logfiles
-LOG_DECODER                     = LogSourceDecoder((
-    ('utf-8',       'strict'),
-    ('latin-1',     'replace'),
-))
-
-# log filename format
-LOG_FILENAME_FMT                = '%Y-%m-%d'
-
-# the log parser that we use
-LOG_PARSER                      = IrssiParser(LOG_TIMEZONE, LOG_TIMESTAMP_FMT)
-#LOG_PARSER_FULLTS               = IrssiParser(LOG_TIMEZONE, '%Y%m%d%H%M%S')
-
-# the statically defined channel list
-LOG_CHANNELS                    = ChannelList([
-    LogChannel('tycoon',    "OFTC",     "#tycoon", 
-        LogDirectory(relpath('/home/spbot/irclogs/tycoon'),    LOG_TIMEZONE, LOG_PARSER, LOG_DECODER, LOG_FILENAME_FMT)
-    ),
-
-    LogChannel('openttd',   "OFTC",     "#openttd", 
-        LogDirectory(relpath('/home/spbot/irclogs/openttd'),   LOG_TIMEZONE, LOG_PARSER, LOG_DECODER, LOG_FILENAME_FMT)
-    ),
-
-    LogChannel('test',      "TEST",     "#test",
-        LogDirectory(relpath('/home/spbot/irclogs/test'),  LOG_TIMEZONE, LOG_PARSER, LOG_DECODER, LOG_FILENAME_FMT)
-    )
-])
-
-# URL to the hgweb installation for this code
-HGWEB_URL                       = "http://hg.qmsk.net/irclogs2"
-
-# path to the mercurial working copy containing this code
-HG_WC_PATH                      = relpath(".")
-
-# how to handle decode() errors for logfile lines
-LOG_SOURCE_DECODE_ERRORS        = 'replace'
-
-# date format for URLs
-URL_DATE_FMT                    = '%Y-%m-%d'
-
-# month name format
-MONTH_FMT                       = '%B %Y'
-
-# timezone name format
-TIMEZONE_FMT                    = '%Z %z'
-
-# TTF fonts to use for drawing images
-FORMATTER_IMAGE_FONTS =         {
-    # XXX: no unicode support
-    #    'default':              (None,                                                                  "Ugly default font"         ),
-    'ttf-dejavu-mono':      ("/usr/share/fonts/truetype/ttf-dejavu/DejaVuSansMono.ttf",             "DejaVu Sans Mono"          ),
-    'ttf-liberation-mono':  ("/usr/share/fonts/truetype/ttf-liberation/LiberationMono-Regular.ttf", "Liberation Mono Regular"   )
-}
-
-# available formatters
-LOG_FORMATTERS =                {
-    'irssi':        IrssiFormatter,
-    'debug':        DebugFormatter,
-}
-
-# Cookie settings
-PREF_COOKIE_PATH                = '/'
-PREF_COOKIE_EXPIRE_SECONDS      = 1 * 365 * 24 * 60 * 60    # one year
-
-# default preferences
-PREF_TIME_FMT_DEFAULT           = '%H:%M:%S'
-PREF_DATE_FMT_DEFAULT           = '%Y-%m-%d'
-PREF_TIMEZONE_FALLBACK          = pytz.utc
-PREF_FORMATTER_DEFAULT          = IrssiFormatter
-PREF_COUNT_DEFAULT              = 200
-PREF_COUNT_MAX                  = None
-PREF_IMAGE_FONT_DEFAULT         = 'ttf-dejavu-mono'
-PREF_IMAGE_FONT_SIZE_DEFAULT    = 12
-PREF_IMAGE_FONT_SIZE_MAX        = 32
-
-# search line count options
-SEARCH_LINE_COUNT_OPTIONS       = (
-    (50,    50), 
-    (100,   100), 
-    (200,   200), 
-    (None,  "&#8734;"),
-)
-
-# search index database path
-SEARCH_INDEX_PATH               = '/home/spbot/irclogs/search-index'
-SEARCH_AUTOINDEX_PATH           = '/home/spbot/irclogs/search-autoindex'
-

--- a/error.py	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,183 +0,0 @@
-"""
-    Build error messages
-"""
-
-import traceback, sys, cgi, urllib
-
-def truncate (msg, limit) :
-    """
-        Truncate the given message to <limit> chars
-    """
-
-    if len(msg) > limit :
-        return msg[:limit-3] + '...'
-
-    else :
-        return msg
-
-def build_link (title, url) :
-    return '<a href="%s">%s</a>' % (cgi.escape(url, True), cgi.escape(title))
-
-def build_error (exc_info=None, env=None) :
-    """
-        Dumps out a raw traceback of the given/current exception to stdout.
-
-        If request_env, it should be a environment dict, like under WSGI, and will be used to display additional info
-        about the request.
-
-        Returns a (status, content-type, body) tuple, with all components being non-unicode strs.
-    """
-
-    # default for exc_info is current exception
-    if not exc_info :
-        exc_info = sys.exc_info()
-
-    # request URL?
-    if env :
-        try :
-            from qmsk.web.http import request_url
-
-            url = request_url(env)
-
-        except :
-            # ignore
-            url = None
-    else :
-        url = None
-
-    # working copy path?
-    try :
-        from config import HG_WC_PATH, HGWEB_URL
-
-        wc_path = HG_WC_PATH
-        hgweb_url = HGWEB_URL
-
-    except :
-        # a good guess
-        wc_path = '.'
-        hgweb_url = None
-    
-    # version?
-    try :
-        from version import version_string, version_link_hg
-
-        version = version_string(wc_path)
-
-        if hgweb_url :
-            version_href = version_link_hg(hgweb_url, wc_path)
-
-        else :
-            version_href = None
-    
-    except :
-        version = None
-        version_href = None
-    
-    # the exception type
-    exception_str = traceback.format_exception_only(*exc_info[:2])[-1]
-
-    # the exception traceback
-    traceback_lines = traceback.format_exception(*exc_info)
-
-    # XXX: make this configureable
-    trac_url = "http://projects.qmsk.net/irclogs2/trac"
-    
-    # ticket list
-    trac_query = build_link("All tickets", "%s/query" % trac_url)
-
-    # submit ticket
-    submit_args = dict(type='defect')
-    
-    # handle optional components
-    if url :
-        submit_args['url'] = url
-        trac_query_url = build_link("Same URL", "%s/query?url=%s" % (trac_url, urllib.quote(url)))
-    else :
-        trac_query_url = ""
-    
-    if version :
-        submit_args['revision'] = version
-        trac_query_version = build_link("Same version", "%s/query?revision=%s" % (trac_url, urllib.quote(version)))
-
-    else :
-        trac_query_version = ""
-    
-    if exception_str :
-        submit_args['summary'] = truncate(exception_str, 140)
-        trac_query_err = build_link("Same error", "%s/query?summary=%s" % (trac_url, urllib.quote(exception_str.rstrip())))
-
-    else :
-        trac_query_err = ""
-
-    if traceback_lines :
-        # this is big
-        submit_args['description'] = """\
-[Insert any additional information here]
-
-
-= Traceback =
-{{{
-%s
-}}}""" % ''.join(traceback_lines)
-    
-    # the trac newticket URL
-    submit_url = "%s/newticket?%s" % (trac_url, '&amp;'.join('%s=%s' % (urllib.quote(k), urllib.quote(v)) for k, v in submit_args.iteritems()))
-
-    # return
-    return ('500 Internal Server Error', 'text/html; charset=UTF-8', ("""\
-<html><head><title>500 Internal Server Error</title></head><body>
-<h1>Oops!</h1>
-<p>
-    An error occured, which was not logged, and was not reported to anybody. It might be your fault, or it might be mine.
-</p>
-
-<p>
-    You can try:
-    <ol style="list-style-type: lower-alpha">
-        <li><strong>Poking</strong> the administrator of this site to see if they respond</li>
-        <li><strong>Looking</strong> for similar issue tickets with:
-          <ul>
-            <li>%(trac_query)s</li>
-            <li>%(trac_query_url)s</li>
-            <li>%(trac_query_version)s</li>
-            <li>%(trac_query_err)s</li>
-          </ul>
-        </li>
-        <li><strong>Submitting</strong> a new ticket using the following link (quick &amp; easy):</li>
-    </ol>
-</p>
-<pre>
-    <a href="%(submit_url)s">%(submit_url_short)s</a>
-</pre>
-
-<h2>Details:</h2>
-<p>The page you tried to request was:</p>
-<pre>
-    %(url)s
-</pre>
-
-<p>The software version is:</p>
-<pre>
-    %(version_link)s
-</pre>
-
-<p>The error was:</p>
-<pre>
-    %(exception)s
-</pre>
-
-<p>The traceback was:</p>
-<pre>%(traceback)s</pre>
-</body></html>""" % dict(
-        url                 = url if url else 'Unknown',
-        version_link        = version_href if version_href else 'Unknown',
-        exception           = truncate(exception_str, 512),
-        traceback           = cgi.escape(''.join('   ' + line for line in traceback_lines)),
-        trac_query          = trac_query,
-        trac_query_url      = trac_query_url,
-        trac_query_version  = trac_query_version,
-        trac_query_err      = trac_query_err,
-        submit_url          = submit_url,
-        submit_url_short    = truncate(submit_url, 120)
-    )).encode('utf-8'))
-

--- a/handlers.py	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,292 +0,0 @@
-"""
-    Our URL action handlers
-"""
-
-import datetime, calendar, pytz
-
-from qmsk.web import http, template
-
-import urls, channels, helpers
-import preferences as prefs
-from preferences import preferences
-import config, log_search
-
-# load templates from here
-templates = template.TemplateLoader("templates",
-    _helper_class   = helpers.Helpers,
-    urls            = urls,
-    channel_list    = config.LOG_CHANNELS,
-    config          = config,
-)
-
-# return a http.Response for the given text in the given format
-def _render_type (request, channel, lines, type, full_timestamps=False) :
-    """
-        Render the given LogLines as a http.Response in the given format, which is one of:
-            html    - XXX: not supported
-            txt     - Plaintext
-            png     - PNG image
-            rss     - RSS feed
-    """
-
-    # load related preferences
-    formatter = request.prefs['formatter']
-
-    kwargs = dict(
-        full_timestamps = full_timestamps
-    )
-
-    # we can render in various modes...
-    if type in ('html', None) :
-        xxx
-
-    elif type == 'txt' :
-        # plaintext
-        lines = formatter.format_txt(lines, **kwargs)
-
-        # build data
-        data = '\n'.join(data for line, data in lines)
-
-        return http.Response(data, 'text/plain')
-
-    elif type == 'png' :
-        # PNG image
-        png_data = formatter.format_png(lines, **kwargs)
-
-        return http.Response(png_data, 'image/png', charset=None)
-    
-    elif type == 'rss' :
-        # RSS feed
-        rss_data = formatter.format_rss(lines, **kwargs)
-        
-        # XXX: fix to render as unicode?
-        return http.Response(rss_data, 'application/rss+xml', charset=None)
-
-    else :
-        raise http.ResponseError("Unrecognized type: %r" % (type, ))
-
-def _render_date (request, channel, date, lines, type, count, page, max) :
-    """
-        Render the given LogLines as a http.Response for channel_date
-    """
-
-    # type?
-    if type :
-        # special type
-        return _render_type(request, channel, lines, type)
-    
-    else :
-        # format HTML
-        lines = request.prefs['formatter'].format_html(lines)
-
-        # render
-        return templates.render_to_response("channel_date",
-            req             = request,
-            prefs           = request.prefs,
-            channel         = channel,
-            date            = date,
-            count           = count,
-            page            = page,
-            max             = max,
-            lines           = lines,
-            
-            # for prev/next date
-            date_next       = channel.source.get_next_date(date),
-            date_prev       = channel.source.get_prev_date(date),
-        )
-
-@preferences.handler()
-def index (request) :
-    """
-        The topmost index page, display a list of available channels, perhaps some general stats
-    """
-    
-    return templates.render_to_response("index",
-        req             = request,
-        prefs           = request.prefs,
-    )
-
-# XXX: fix this namespace crap
-@preferences.handler()
-def preferences_ (request) :
-    """
-        Preferences editor
-    """
-
-    # POST?
-    if request.is_post() :
-        # update any modified preferences
-        for pref in preferences.pref_list :
-            # get the POST'd value, default = None
-            post_value = request.get_post(pref.name, None)
-
-            # skip non-specified values
-            # XXX: this is to not clobber timezone_offset to None
-            if post_value is None :
-                continue
-
-            # parse the POST'd value, None -> default
-            new_value = request.prefs.parse(pref, post_value)
-
-            # update if given and changed
-            if new_value != request.prefs[pref] :
-                request.prefs.set(pref.name, new_value)
-
-    # render
-    return templates.render_to_response("preferences",
-        req             = request,
-        prefs           = request.prefs,
-        preferences     = prefs,
-    )
-
-def channel_select (request, channel) :
-    """
-        Redirect to the appropriate channel_view
-    """
-   
-    return http.Redirect(urls.channel.build(request, channel=channel))
-
-@preferences.handler(prefs.formatter)
-def channel_last (request, channel, count, formatter, type=None) :
-    """
-        The main channel view page, displaying the most recent lines
-    """
- 
-    # get latest events
-    lines = channel.source.get_latest(count)
-   
-    # type?
-    if type :
-        # other format
-        return _render_type(request, channel, lines, type)
-
-    else :
-        # format HTML
-        lines = formatter.format_html(lines)
-
-        # render page
-        return templates.render_to_response("channel_last",
-            req             = request,
-            prefs           = request.prefs,
-            channel         = channel,
-            count           = count,
-            lines           = lines,
-        )
-
-@preferences.handler(prefs.formatter, prefs.timezone, prefs.count)
-def channel_link (request, channel, timestamp, formatter, timezone, count, type=None) :
-    """
-        Display channel_date for specific UTC timestamp
-    """
-
-    # convert timestamp to user's timezone
-    timestamp = timestamp.astimezone(timezone)
-
-    # get correct day's correct page of lines
-    page, max, lines = channel.source.get_date_paged(timestamp, count)
-    
-    # render channel_date
-    return _render_date (request, channel, timestamp, lines, type, count, page, max)
-
-@preferences.handler(prefs.timezone)
-def channel_calendar (request, channel, year, month, timezone) :
-    """
-        Display a list of avilable logs for some month
-    """
-
-    # current date as default
-    now = timezone.localize(datetime.datetime.now())
-
-    # target year/month
-    target = timezone.localize(datetime.datetime(
-        year    = year if year else now.year,
-        month   = month if month else now.month,
-        day     = 1
-    ))
-
-    # display calendar
-    return templates.render_to_response("channel_calendar",
-        req             = request,
-        prefs           = request.prefs,
-        channel         = channel,
-        month           = target,
-    )
-
-@preferences.handler(prefs.count, prefs.timezone)
-def channel_date (request, channel, date, count, timezone, page=1, type=None) :
-    """
-        Display all log data for the given date
-    """
-    
-    # convert date to user's timezone
-    date = timezone.localize(date)
-
-#    print
-#    print "channel_date: date=%s" % date
-
-    # get that day's events, either paged or not
-    if page :
-        page, max, lines = channel.source.get_date_paged(date, count, page)
-        
-    else :
-        lines = channel.source.get_date(date)
-        max = None
-
-    # render channel_date
-    return _render_date (request, channel, date, lines, type, count, page, max)
-
-@preferences.handler(prefs.formatter, prefs.count)
-def channel_search (request, channel, formatter, count, q=None, page=1, max=1, type=None, t=None) :
-    """
-        Display the search form for the channel for GET, or do the search for POST.
-    """
-
-    # calculate skip offset from page/count
-    skip = (page - 1) * count
-
-    # got a search query?
-    if q :
-        # attribute targets
-        targets = dict(('search_%s' % target, True) for target in t if target in ('msg', 'nick')) if t else {}
-
-        try :
-            # do search
-            lines = log_search.get_index().search_simple(channel, q, count, skip, **targets)
-
-            # update max?
-            if max and page > max :
-                max = page
-        
-        except log_search.NoResultsFound :
-            # no results
-            lines = None
-
-    else :
-        # just display the search form
-        lines = None
- 
-    # type?
-    if type and lines :
-        # special type
-        return _render_type(request, channel, lines, type, full_timestamps=True)
-    
-    else :
-        # format lines to HTML if any
-        if lines :
-            # format
-            lines = formatter.format_html(lines, full_timestamps=True)
-
-        # render page
-        return templates.render_to_response("channel_search",
-            req             = request,
-            prefs           = request.prefs,
-            channel         = channel,
-            search_query    = q,
-            search_targets  = t,
-            count           = count,
-            page            = page,
-            skip            = skip,
-            max             = max,
-            lines           = lines,
-        )
-

--- a/helpers.py	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,243 +0,0 @@
-"""
-    Some additional helpers
-"""
-
-import datetime
-import calendar as _calendar
-
-import qmsk.web.helpers
-
-import preferences, urls, config, version
-
-class Helpers (qmsk.web.helpers.Helpers) :
-    """
-        Our set of helpers, inheriting from base helpers
-    """
-
-    # set contructor...
-    set = set
-
-    # reference to calendar instance
-    calendar = _calendar.Calendar()
-
-    # list of (month_num, month_name) for the months in the year
-    months = list(enumerate(_calendar.month_name))[1:]
-    
-    def version_link (self) :
-        """
-            Returns a <a href> representing this version of the software
-        """
-
-        return version.version_link_hg(config.HGWEB_URL, config.HG_WC_PATH)
-
-    def tz_name (self, tz) :
-        """
-            Returns a string describing the given timezone
-        """
-
-        return self.now().strftime(config.TIMEZONE_FMT)
-
-    def fmt_month (self, date) :
-        """
-            Formats a month
-        """
-
-        return date.strftime(config.MONTH_FMT)
-        
-    def fmt_weekday (self, wday) :
-        """
-            Formats an abbreviated weekday name
-        """
-
-        return _calendar.day_abbr[wday]
-
-    def build_date (self, month, mday) :
-        """
-            Returns a datetime.datetime for the given (month.year, month.month, mday)
-        """
-
-        return self.ctx['prefs'][preferences.timezone].localize(datetime.datetime(month.year, month.month, mday))
-    
-    def now (self) :
-        """
-            Build current time
-        """
-
-        return self.ctx['prefs'][preferences.timezone].localize(datetime.datetime.now())
-
-    def today (self) :
-        """
-            Build today's date
-        """
-        
-        return self.now().date()
-
-    def is_today (self, dt) :
-        """
-            Checks if the given datetime.datetime is today
-        """
-
-        # compare with current date
-        return dt.date() == self.today()
-    
-    def is_this_month (self, month) :
-        """
-            Checks the given month is the current month
-        """
-
-        today = self.today()
-
-        return (month.year == today.year and month.month == today.month)
-
-    @staticmethod
-    def _wrap_year (year, month) :
-        """
-            Wraps month to between [1, 12], spilling overflow/underflow by to year.
-
-            Returns (year, month)
-        """
-        
-        # underflow?
-        if month == 0 :
-            # wrap to previous year
-            return (year - 1, 12)
-        
-        # overflow?
-        elif month == 13 :
-            # wrap to next year
-            return (year + 1, 1)
-        
-        # sane value
-        elif 1 <= month <= 12 :
-            return (year, month)
-        
-        # insane value
-        else :
-            assert False, "invalid year/month: %d/%d" % (year, month)
-
-    def prev_month (self, month) :
-        """
-            Returns the month preceding the given one (as a datetime.datetime)
-        """
-        
-        # previous month
-        y, m = self._wrap_year(month.year, month.month - 1)
-        
-        # build datetime
-        return datetime.datetime(year=y, month=m, day=1, tzinfo=month.tzinfo)
-
-    def next_month (self, month) :
-        """
-            Returns the month following the given one (as a datetime.datetime)
-        """
-        
-        # previous month
-        y, m = self._wrap_year(month.year, month.month + 1)
-        
-        # build datetime
-        return datetime.datetime(year=y, month=m, day=1, tzinfo=month.tzinfo)
-    
-    def fmt_time (self, time=None) :
-        """
-            Format given time, or current time
-        """
-        
-        # defaults
-        if not time :
-            time = self.now()
-
-        return time.strftime(self.ctx['prefs'][preferences.time_format])
-
-    def fmt_date (self, date=None) :
-        """
-            Format given date, or current date
-        """
-        
-        # defaults
-        if not date :
-            date = self.now()
-
-        return date.strftime(self.ctx['prefs'][preferences.date_format])
-
-    def url (self, url, **params) :
-        """
-            Build URL with our request object
-        """
-
-        return url.build(self.ctx['req'], **params)
-
-    # old name
-    build_url = url
-
-    def utc_timestamp (self, dtz) :
-        """
-            Build an UTC timestamp from the given datetime
-        """
-
-        return urls.types['ts'].build(dtz)
-    
-    def skip_next (self, count, skip) :
-        """
-            Return skip offset for next page
-        """
-
-        return count + skip
-    
-    def skip_page (self, count, page) :
-        """
-            Skip to page
-        """
-
-        if page :
-            return count * page
-
-        else :
-            return None
-
-    def skip_prev (self, count, skip) :
-        """
-            Return skip offset for previous page, None for first page
-        """
-
-        if skip > count :
-            return skip - count
-
-        else :
-            return None
-
-    def max (self, *values) :
-        """
-            Returns the largest of the given values
-        """
-
-        return max(values)
-    
-    def select_options (self, key_values, selected_key=None) :
-        """
-            Render a series of <option> tags for <select>.
-
-            The given key_values is an iterable of (key, value) pairs, key may be None if it's the same as value.
-        """
-
-        return '\n'.join(
-            '\t<option%s%s>%s</option>' % (
-                ' value="%s"' % key if key is not None else '',
-                ' selected="selected"' if (key if key is not None else value) == selected_key else '',
-                value
-            ) for key, value in key_values
-        )
-    
-    def prev_date (self, date) :
-        """
-            Returns the previous date for the given datetime-date
-        """
-
-        return datetime.datetime(date.year, date.month, date.day, tzinfo=date.tzinfo) - datetime.timedelta(days=1)
-
-    def next_date (self, date) :
-        """
-            Returns the previous date for the given datetime-date
-        """
-
-        return datetime.datetime(date.year, date.month, date.day, tzinfo=date.tzinfo) + datetime.timedelta(days=1)
-

--- a/index.cgi	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,47 +0,0 @@
-#!/usr/bin/python2.5
-
-"""
-    CGI mode using qmsk.web.cgi
-"""
-
-def error () :
-    """
-        Dumps out a raw traceback of the current exception to stdout, call from except.
-
-        Used for low-level ImportError's
-    """
-    
-    # if this import fails, we're doomed
-    import sys, error
-    
-    # format info
-    status, content_type, body = error.build_error()
-    
-    # HTTP headers+body
-    sys.stdout.write('Status: %s\r\n' % status)
-    sys.stdout.write('Content-type: %s\r\n' % content_type)
-    sys.stdout.write('\r\n')
-    sys.stdout.write(body)
-    
-def main () :
-    """
-        Build our wsgi.Application and run
-    """
-
-    try :
-        from qmsk.web import cgi_main
-        import wsgi
-
-        # create app
-        app = wsgi.Application()
-        
-        # run once
-        cgi_main.run(app)
-
-    except :
-        # display error on stdout
-        error()
-    
-if __name__ == '__main__' :
-    main()
-

--- a/index.fcgi	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,26 +0,0 @@
-#!/usr/bin/python2.5
-# :set filetype=py
-
-"""
-    FastCGI mode using qmsk.web.fastcgi_main
-"""
-
-from qmsk.web import fastcgi_main
-
-# XXX: error handling for imports? Lighttp sucks hard at this
-import wsgi
-
-def main () :
-    """
-        Build our WSGIApplication and run
-    """
-
-    # create app
-    app = wsgi.Application()
-
-    # run once
-    fastcgi_main.run(app)
-
-if __name__ == '__main__' :
-    main()
-

--- a/log_channel.py	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,54 +0,0 @@
-"""
-    A channel represents a series of log events, stored in some log source
-"""
-
-import log_search
-
-class LogChannel (object) :
-    """
-        A single IRC channel, logged to some specific place
-    """
-
-    def __init__ (self, id, network, name, source) :
-        """
-            Initialize this channel from the given identifier key, network name, channel name, and LogSource
-        """
-        
-        # store
-        self.id = id
-        self.network = network
-        self.name = name
-        self.source = source
-
-        # bind source
-        self.source.bind_channel(self)
-    
-    @property
-    def title (self) :
-        """
-            Title is 'Network - #channel'
-        """
-
-        return "%s - %s" % (self.network, self.name)
-    
-    def search (self, query) :
-        """
-            Perform a search on this channel, returning a sequence of LogLines
-        """
-
-        return log_search.index.search_simple(self, query)
-
-    def __str__ (self) :
-        """
-            Returns self.title
-        """
-
-        return self.title
-
-    def __repr__ (self) :
-        """
-            Uses self.id
-        """
-
-        return "LogChannel(%s)" % (self.id, )
-

--- a/log_formatter.py	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,257 +0,0 @@
-"""
-    Format LogLines into some other representation
-"""
-
-import re, xml.sax.saxutils
-
-from log_line import LogTypes
-from log_formatter_pil import PILImageFormatter
-from log_formatter_rss import RSSFormatter
-
-class LogFormatter (object) :
-    """
-        Provides a method to format series of LogLines into various output formats, with varying themes.
-    """
-
-    # machine-readable name
-    name = None
-
-    # human-readable name
-    title = None
-
-    ## parameters
-    # use a fixed-width font for HTML output
-    html_fixedwidth = True
-
-    def __init__ (self, tz, timestamp_fmt, img_ttf_path, img_font_size) :
-        """
-            Initialize to format timestamps with the given timezone and timestamp.
-
-            Use the given TTF font to render image text with the given size, if given, otherwise, a default one.
-        """
-        
-        # store
-        self.tz = tz
-        self.timestamp_fmt = timestamp_fmt
-        self.img_ttf_path = img_ttf_path
-        self.img_font_size = img_font_size
-        
-        # XXX: harcoded
-        self.date_fmt = '%Y-%m-%d'
-    
-    def _format_line_text (self, line, template_dict, type=None, full_timestamp=False, **extra) :
-        """
-            Format the given line as text, using the given { type: string template } dict.
-            
-            If type is given, then it overrides line.type
-
-            Any additional keyword args will also be available for the template to use
-        """
-
-        # default type?
-        if type is None :
-            type = line.type
-            
-        # look up the template
-        if type in template_dict :
-            template = template_dict[type]
-
-        else :
-            raise Exception("Format template not defined for type: %s" % LogTypes.name_from_code(type))
-        
-        # convert timestamp into display timezone
-        dtz = line.timestamp.astimezone(self.tz)
-        
-        # full timestamps?
-        if full_timestamp :
-            # XXX: let the user define a 'datetime' format instead?
-            timestamp_fmt = self.date_fmt + ' ' + self.timestamp_fmt
-
-        else :
-            timestamp_fmt = self.timestamp_fmt
-        
-        # breakdown source
-        source_nickname, source_username, source_hostname, source_chanflag = line.source
-        target_nickname = line.target
-        
-        # format with dict
-        return template % dict(
-            channel_name    = line.channel.name,
-            datetime        = dtz.strftime('%a %b %d %H:%M:%S %Y'),
-            date            = dtz.strftime(self.date_fmt),
-            timestamp       = dtz.strftime(timestamp_fmt),
-            source_nickname = source_nickname,
-            source_username = source_username,
-            source_hostname = source_hostname,
-            source_chanflag = source_chanflag,
-            target_nickname = target_nickname,
-            message         = line.data,
-            **extra
-        )
-    
-    def format_txt (self, lines, full_timestamps=False) :
-        """
-            Format given lines as plaintext.
-
-            If full_timestamps is given, the output will contain full timestamps with both date and time.
-
-            No trailing newlines.
-        """
-
-        abstract
-
-    def format_html (self, lines, full_timestamps=False) :
-        """
-            Format as HTML.
-            
-            See format_txt for information about arguments
-        """
-
-        abstract
-    
-    def format_png (self, lines, full_timestamps=False) :
-        """
-            Format as a PNG image, returning the binary PNG data
-        """
-
-        abstract
-    
-    def format_rss (self, lines, full_timestamps=False) :
-        """
-            Format as an XML RSS document
-        """
-        
-        abstract
-
-class BaseHTMLFormatter (LogFormatter) :
-    """
-        Implements some HTML-formatting utils
-    """
-    
-    # parameters
-    html_fixedwidth = True
-
-    # regexp to match URLs
-    URL_REGEXP = re.compile(r"http://\S+")
-
-    def _process_links (self, line) :
-        """
-            Processed the rendered line, adding in <a href>'s for things that look like URLs, returning the new line.
-
-            The line should already be escaped
-        """
-
-        def _encode_url (match) :
-            # encode URL
-            url_html = match.group(0)
-            url_link = xml.sax.saxutils.unescape(url_html)
-
-            return '<a href="%(url_link)s">%(url_html)s</a>' % dict(url_link=url_link, url_html=url_html)
-
-        return self.URL_REGEXP.sub(_encode_url, line)
- 
-    def format_html (self, lines, **kwargs) :
-        """
-            Just uses format_txt, but processes links, etc
-        """
-        
-        # format using IrssiTextFormatter
-        for line, txt in self.format_txt(lines, **kwargs) :
-            # escape HTML
-            html = xml.sax.saxutils.escape(txt)
-
-            # process links
-            html = self._process_links(html)
-
-            # yield
-            yield line, html
-
-   
-class IrssiTextFormatter (RSSFormatter, PILImageFormatter, LogFormatter) :
-    """
-        Implements format_txt for irssi-style output
-    """
-
-    # format definitions by type
-    __FMT = {
-        LogTypes.RAW        : "%(timestamp)s %(data)s",
-        LogTypes.LOG_OPEN   : "--- Log opened %(datetime)s",
-        LogTypes.LOG_CLOSE  : "--- Log closed %(datetime)s",
-        'DAY_CHANGED'       : "--- Day changed %(date)s",
-
-        LogTypes.MSG        : "%(timestamp)s <%(source_chanflag)s%(source_nickname)s> %(message)s",
-        LogTypes.NOTICE     : "%(timestamp)s -%(source_nickname)s- %(message)s",
-        LogTypes.ACTION     : "%(timestamp)s  * %(source_nickname)s %(message)s",
-
-        LogTypes.JOIN       : "%(timestamp)s -!- %(source_nickname)s [%(source_username)s@%(source_hostname)s] has joined %(channel_name)s",
-        LogTypes.PART       : "%(timestamp)s -!- %(source_nickname)s [%(source_username)s@%(source_hostname)s] has left %(channel_name)s [%(message)s]",
-        LogTypes.KICK       : "%(timestamp)s -!- %(target_nickname)s was kicked from %(channel_name)s by %(source_nickname)s [%(message)s]",
-        LogTypes.MODE       : "%(timestamp)s -!- mode/%(channel_name)s [%(message)s] by %(source_nickname)s",
-
-        LogTypes.NICK       : "%(timestamp)s -!- %(source_nickname)s is now known as %(target_nickname)s",
-        LogTypes.QUIT       : "%(timestamp)s -!- %(source_nickname)s [%(source_username)s@%(source_hostname)s] has quit [%(message)s]",
-
-        LogTypes.TOPIC      : "%(timestamp)s -!- %(source_nickname)s changed the topic of %(channel_name)s to: %(message)s",
-        'TOPIC_UNSET'       : "%(timestamp)s -!- Topic unset by %(source_nickname)s on %(channel_name)s",
-
-        LogTypes.SELF_NOTICE: "%(timestamp)s -%(source_nickname)s- %(message)s",
-        LogTypes.SELF_NICK  : "%(timestamp)s -!- %(source_nickname)s is now known as %(target_nickname)s",
-
-        LogTypes.NETSPLIT_START : 
-                              "%(timestamp)s -!- Netsplit %(source_hostname)s <-> %(target_nickname)s quits: %(_netsplit_targets)s",
-        LogTypes.NETSPLIT_END :
-                              "%(timestamp)s -!- Netsplit over, joins: %(_netsplit_targets)s",
-    }
-
-    def format_txt (self, lines, full_timestamps=False) :
-        # ...handle each line
-        for line in lines :
-            # extra args
-            extra = {}
-            
-            # default to line.type
-            type = line.type
-
-            # special formatting for unset-Topic
-            if line.type == LogTypes.TOPIC and line.data is None :
-                type = 'TOPIC_UNSET'
-            
-            # format netsplit stuff
-            elif line.type & LogTypes._NETSPLIT_MASK :
-                # format the netsplit-targets stuff
-                extra['_netsplit_targets'] = line.data
-
-            # using __TYPES
-            yield line, self._format_line_text(line, self.__FMT, type, full_timestamps, **extra)
-
-class IrssiFormatter (BaseHTMLFormatter, IrssiTextFormatter) :
-    """
-        Implements plain black-and-white irssi-style formatting
-    """
-    
-    # name
-    name = 'irssi'
-    title = "Irssi (plain)"
-
-class DebugFormatter (BaseHTMLFormatter) :
-    """
-        Implements a raw debug-style formatting of LogLines
-    """
-
-    # name
-    name = 'debug'
-    title = "Raw debugging format"
-    
-    def format_txt (self, lines, full_timestamps=False) :
-        # iterate
-        for line in lines :
-            # just dump
-            yield line, unicode(line)
-
-def by_name (name) :
-    """
-        Lookup and return a class LogFormatter by name
-    """
-
-    return FORMATTERS[name]
-

--- a/log_formatter_pil.py	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,81 +0,0 @@
-"""
-    Use of PIL to render the image formatting stuff
-"""
-
-from PIL import Image, ImageDraw, ImageFont
-
-from cStringIO import StringIO
-
-class PILImageFormatter (object) :
-    """
-        Mixin for LogFormatter that implements the basic image-rendering operations on top of format_txt
-    """
-    
-    # the font we load
-    font = None
-
-    # line spacing in pixels
-    LINE_SPACING = 1
-
-    def _load_font (self) :
-        """
-            Use the configured img_ttf_path for a TrueType font, or a default one
-        """
-
-        if self.font :
-            pass
-        
-        elif self.img_ttf_path :
-            # load truetype with configured size
-            self.font = ImageFont.truetype(self.img_ttf_path, self.img_font_size)
-
-        else :
-            # default
-            self.font = ImageFont.load_default()
-
-        return self.font
-
-    def format_png (self, lines, **kwargs) :
-        """
-            Build and return a PNG image of the given lines, using format_txt
-        """
-
-        # load font
-        font = self._load_font()
-
-        # build list of plain-text line data
-        lines = list(data for line, data in self.format_txt(lines, **kwargs))
-        
-        # lines sizes
-        line_sizes = [font.getsize(line) for line in lines]
-
-        # figure out how wide/high the image will be
-        width = max(width for width, height in line_sizes)
-        height = sum(height + self.LINE_SPACING for width, height in line_sizes)
-
-        # create new B/W image
-        img = Image.new('L', (width, height), 0xff)
-
-        # drawer
-        draw = ImageDraw.Draw(img)
-        
-        # starting offset
-        offset_y = 0
-
-        # draw the lines
-        for line, (width, height) in zip(lines, line_sizes) :
-            # draw
-            draw.text((0, offset_y), line, font=font)
-
-            # next offset
-            offset_y += height + self.LINE_SPACING
-        
-        # output buffer
-        buf = StringIO()
-
-        # save
-        img.save(buf, 'png')
-
-        # return data
-        return buf.getvalue()
-

--- a/log_formatter_rss.py	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-"""
-    Uses PyRSS2Gen to generate XML RSS documents
-"""
-
-import PyRSS2Gen as RSS2Gen
-import datetime, pytz
-
-class RSSFormatter (object) :
-    """
-        Mixin for LogFormatter that implements the basic RSS-rendering stuff on top of format_html
-    """
-
-    def format_rss (self, lines, **kwargs) :
-        """
-            Process using format_html
-        """
-        
-        # build the RSS2 object and return the XML
-        return RSS2Gen.RSS2(
-            title           = "IRC RSS feed",
-            link            = "http://irclogs.qmsk.net/",
-            description     = "A stupid RSS feed that nobody sane would ever use",
-            
-            # XXX: GMT
-            lastBuildDate   = datetime.datetime.utcnow(),
-
-            items           = [
-                RSS2Gen.RSSItem(
-                    # use the formatted HTML data as the title
-                    title       = html_data,
-
-                    # timestamp
-                    pubDate     = line.timestamp.astimezone(pytz.utc),
-
-                    # link
-                    link        = "http://xxx/",
-
-                ) for line, html_data in self.format_html(lines, **kwargs)
-            ]
-        ).to_xml('utf8')
-

--- a/log_line.py	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,186 +0,0 @@
-"""
-    An IRC logfile consists of a series of lines/events
-"""
-
-class LogTypes :
-    """
-        Definitions of the various LogLines types:
-
-            LogTypes.RAW
-            LogTypes.LOG_OPEN
-            LogTypes.LOG_CLOSE
-
-            LogTypes.MSG
-            LogTypes.NOTICE
-            LogTypes.ACTION
-
-            LogTypes.JOIN
-            LogTypes.PART
-            LogTypes.KICK
-            LogTypes.MODE
-
-            LogTypes.NICK
-            LogTypes.QUIT
-
-            LogTypes.TOPIC
-
-            LogTypes.SELF_NOTICE
-            LogTypes.SELF_NICK
-    """
- 
-    # list of LogType values by name
-    LIST = [
-        ## special
-        # unknown type, may or may not have a timestamp, no source, only data
-        ('RAW',         0x01),
-
-        # log opened
-        ('LOG_OPEN',    0x02),
-
-        # log closed
-        ('LOG_CLOSE',   0x03),
-
-        ## messages
-        # <source> sent message <data> to <channel>
-        ('MSG',         0x10),
-        
-        # <source> sent notice with message <data> to <channel>
-        ('NOTICE',      0x11),
-
-        # <source> sent CTCP action with message <data> to <channel>
-        ('ACTION',      0x12),
-        
-        ## user-channel stats
-        # <source> joined <channel>
-        ('JOIN',        0x21),
-
-        # <source> left <channel> with message <data>
-        ('PART',        0x22),
-
-        # <source> kicked <target> from <channel> with message <data>
-        ('KICK',        0x25),
-     
-        # <source> changed modes on <channel> with modestring <data>
-        ('MODE',        0x26),
-        
-        ## user status
-        # <source> changed nickname to <target>
-        ('NICK',        0x31),
-
-        # <source> quit the network with quit-message <data>
-        ('QUIT',        0x32),
-
-        ## general channel status
-        # <source> changed the topic of <channel> to <data>
-        # data may be None if the topic was unset
-        ('TOPIC',       0x41),
-
-        ## our own actions
-        # we (<source>) sent a notice with message <data> to <channel>
-        ('SELF_NOTICE', 0x51),
-
-        # we (<source>) changed nickname to <target>
-        ('SELF_NICK',   0x52),
-
-        ## slightly weirder bits
-        # netsplit between <source_hostname> and <target_hostname>, <data> is a space-separated list of <chanflags><nickname>s affected
-        # the last item in the list of nicknames may also be of the form "+<count>", where count is the number of additional, but hidden, nicknames affected
-        ('NETSPLIT_START',  0x61),
-
-        # netsplit over, <data> is a list of users affected, see NETSPLIT_START
-        ('NETSPLIT_END',    0x062),
-    ]
-    
-    @classmethod
-    def name_from_code (cls, code) :
-        """
-            Looks up a LogType name by code
-        """
-
-        return dict((type, name) for name, type in cls.LIST)[code]
-
-# apply as attributes
-for name, code in LogTypes.LIST :
-    setattr(LogTypes, name, code)
-
-# masks
-LogTypes._NETSPLIT_MASK = 0x60
-
-class LogLine (object) :
-    """
-        An event on some specific channel
-    """
-
-    # the LogChannel
-    channel = None
-
-    # the offset, only garunteed to be unique for a specific channel and date
-    offset = None
-
-    # the event type, as defiend in LogTypes
-    type = None
-
-    # the UTC timestamp of the event
-    timestamp = None
-
-    # the source, this should be a (nickname, username, hostname, chanflags) tuple
-    source = None
-
-    # possible target nickname for certain types (kick, nick)
-    target = None
-
-    # associated data (message, etc)
-    data = None
-    
-    def __init__ (self, channel, offset, type, timestamp, source, target, data) :
-        """
-            Initialize with given values
-        """
-        
-        self.channel = channel
-        self.offset = offset
-        self.type = type
-        self.timestamp = timestamp
-        self.source = source
-        self.target = target
-        self.data = data
-    
-    def format_type (self) :
-        """
-            Formats type as a string code
-        """
-
-        return LogTypes.name_from_code(self.type)
-
-    def format_source (self) :
-        """
-            Formats source as [<chanflags>][<nickname>][!<username>][@<hostname>], omitting those parts that are missing.
-
-            If all parts are None, this returns the empty string
-        """
-
-        nick, user, host, flags = self.source
-
-        return "%s%s%s%s" % (
-            flags if flags and flags != ' ' else '',
-            nick if nick else '',
-            '!' + user if user else '',
-            '@' + host if host else ''
-        )
-   
-    def __unicode__ (self) :
-        return '\t'.join((
-            self.channel.name,
-            str(self.offset),
-            self.format_type(),
-            str(self.timestamp),
-            self.format_source(),
-            str(self.target),
-            unicode(self.data)
-        ))
-
-    def __repr__ (self) :
-        return "LogLine(%r, %s, %-12s, %s, %-35s, %-10s, %r)" % (
-            self.channel, self.offset, self.format_type(), self.timestamp, self.format_source(), self.target, self.data
-        )
-

--- a/log_parser.py	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,233 +0,0 @@
-"""
-    Parse log data into log_events
-"""
-
-import re
-import datetime
-
-from log_line import LogTypes, LogLine
-
-class LogParseError (Exception) :
-    """
-        Parsing some line failed
-    """
-
-    def __init__ (self, line, offset, message) :
-        super(LogParseError, self).__init__("%r@%s: %s" % (line, offset, message))
-
-class LogParser (object) :
-    """
-        Abstract interface
-    """
-
-    def __init__ (self, tz, timestamp_fmt="%H:%M:%S") :
-        """
-            Setup the parser to use the given format for line timestamps, which are of the given timezone
-        """
-
-        self.tz = tz
-        self.timestamp_fmt = timestamp_fmt
-
-    def parse_lines (self, channel, lines, date=None, starting_offset=None) :
-        """
-            Parse the given (iterable) lines of unicode text into a LogEvent, no trailing newline.
-
-            Channel is the LogChannel that these lines belong to.
-
-            Offset is the starting offset, and may be None to not use it.
-            
-            Giving date lets the parser build full timestamps, otherwise, unless line timestamps have full date
-            information, event timestamps will have a date component of 1900/1/1.
-        """
-
-        abstract
-
-class IrssiParser (LogParser) :
-    """
-        A parser for irssi logfiles
-    """
-    
-    # timestamp prefix, with trailing space
-    _TS = r'(?P<timestamp>[a-zA-Z0-9: ]+[a-zA-Z0-9])\s*'
-
-    # subexpression parts
-    _NICK = r'(?P<nickname>.+?)'
-    _NICK2 = r'(?P<nickname2>.+?)'
-    _TARGET = r'(?P<target>.+?)'
-    _CHAN = r'(?P<channel>.+?)'
-    _CHAN2 = r'(?P<channel2>.+?)'
-    _USERHOST = r'(?P<username>.*?)@(?P<hostname>.*?)'
-    _MSG = r'(?P<message>.*)'
-    _SRV1 = r'(?P<server1>.+?)'
-    _SRV2 = r'(?P<server2>.+?)'
-
-    # regular expressions for matching lines, by type
-    TYPE_EXPRS = (
-        (   LogTypes.LOG_OPEN,      r'--- Log opened (?P<datetime>.+)'                              ),
-        (   LogTypes.LOG_CLOSE,     r'--- Log closed (?P<datetime>.+)'                              ),
-        (   LogTypes.MSG,           _TS + r'<(?P<flags>.)' + _NICK + '> ' + _MSG                   ),
-        (   LogTypes.NOTICE,        _TS + r'-' + _NICK + ':' + _CHAN + '- ' + _MSG                 ),
-        (   LogTypes.ACTION,        _TS + r'\* ' + _NICK + ' ' + _MSG                             ),
-        (   LogTypes.JOIN,          _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has joined ' + _CHAN                               ), 
-        (   LogTypes.PART,          _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has left ' + _CHAN + ' \[(?P<message>.*?)\]'       ),
-        (   LogTypes.KICK,          _TS + r'-!- ' + _TARGET + ' was kicked from ' + _CHAN + ' by ' + _NICK + ' \[(?P<message>.*?)\]'   ),
-        # XXX: use hostname instead of nickname for ServerMode
-        (   LogTypes.MODE,          _TS + r'-!- (mode|ServerMode)/' + _CHAN + ' \[(?P<mode>.+?)\] by (?P<nickname>\S+)'                ),
-        (   LogTypes.NICK,          _TS + r'-!- ' + _NICK + ' is now known as (?P<target>\S+)'                                         ),
-        (   LogTypes.QUIT,          _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has quit \[(?P<message>.*?)\]'                     ),
-        (   LogTypes.TOPIC,         _TS + r'-!- (' + _NICK + ' changed the topic of ' + _CHAN + ' to: (?P<topic>.*)|Topic unset by ' + _NICK2 + ' on ' + _CHAN2 + ')'    ),
-
-        (   LogTypes.SELF_NOTICE,   _TS + r'\[notice\(' + _CHAN + '\)\] ' + _MSG                   ),
-        (   LogTypes.SELF_NICK,     _TS + r'-!- You\'re now known as (?P<target>\S+)'              ),
-
-        (   LogTypes.NETSPLIT_START,    _TS + r'-!- Netsplit ' + _SRV1 + ' <-> ' + _SRV2 + ' quits: (?P<nick_list>[^(]+)( \(\+(?P<count>\d+) more,\S+\))?'),
-        (   LogTypes.NETSPLIT_END,      _TS + r'-!- Netsplit over, joins: (?P<nick_list>[^(]+)( \(\+(?P<count>\d+) more\))?'              ),
-
-        (   'DAY_CHANGED',          r'--- Day changed (?P<date>.+)'                                 ),
-    )
-
-    # precompile
-    TYPE_REGEXES = [(type, re.compile(expr)) for type, expr in TYPE_EXPRS]
-
-    def parse_line (self, channel, line, date, offset=None) :
-        """
-            Parse a single line, and return the resulting LogLine, or None, to ignore the line.
-
-            Uses self.TYPE_REGEXES to do the matching
-        """
-
-        # empty line
-        if not line :
-            return
-
-        # look for match
-        match = type = None
-
-        # test each type
-        for type, regex in self.TYPE_REGEXES :
-            # attempt to match
-            match = regex.match(line)
-            
-            # found, break
-            if match :
-                break
-        
-        # no match found?
-        if not match :
-            raise LogParseError(line, offset, "Line did not match any type")
-        
-        # match groups
-        groups = match.groupdict(None)
-
-        # parse timestamp
-        if 'datetime' in groups :
-            # parse datetime using default asctime() format
-            dt = datetime.datetime.strptime(groups['datetime'], '%a %b %d %H:%M:%S %Y')
-
-        elif 'timestamp' in groups :
-            # parse timestamp into naive datetime
-            dt = datetime.datetime.strptime(groups['timestamp'], self.timestamp_fmt)
-            
-            # override date?
-            if date :
-                dt = dt.replace(year=date.year, month=date.month, day=date.day)
-
-        elif 'date' in groups :
-            # parse date-only datetime
-            dt = datetime.datetime.strptime(groups['date'], '%a %b %d %Y')
-
-        else :
-            # no timestamp !?
-            raise LogParseError(line, offset, "No timestamp")
-
-        # now localize with timezone
-        dtz = self.tz.localize(dt)
-
-        # channel, currently unused
-        channel_name = (groups.get('channel') or groups.get('channel2'))
-
-        # source
-        if 'server1' in groups :
-            source = (None, None, groups.get('server1'), None)
-
-        else :
-            source = (groups.get('nickname') or groups.get('nickname2'), groups.get('username'), groups.get('hostname'), groups.get('flags'))
-
-        # target
-        if 'server2' in groups :
-            target = groups.get('server2')
-
-        else :
-            target = groups.get('target')
-
-        # data
-        if 'message' in groups :
-            data = groups['message']
-        
-        elif 'mode' in groups :
-            data = groups['mode']
-
-        elif 'topic' in groups :
-            data = groups['topic']
-        
-        elif 'nick_list' in groups :
-            # split into components
-            list = groups['nick_list'].split(', ')
-            
-            # additional count?
-            if 'count' in groups and groups['count'] :
-                list.append('+%d' % int(groups['count']))
-            
-            # join
-            data = ' '.join(list)
-
-        else :
-            data = None
-        
-        # custom types?
-        if type == 'DAY_CHANGED' :
-            # new date
-            date = dtz
-        
-        else :
-            # build+return (date, LogLine)
-            return date, LogLine(channel, offset, type, dtz, source, target, data)
-
-    def parse_lines (self, channel, lines, date=None, starting_offset=None) :
-        """
-            Parse the given lines, yielding LogEvents. 
-        """
-
-        for offset, line in enumerate(lines) :
-            # offset?
-            if starting_offset :
-                offset = starting_offset + offset
-
-            else :
-                offset = None
-            
-            # try and parse
-            try :
-                # get None or (date, line)
-                line_info = self.parse_line(channel, line, date, offset)
-
-           # passthrough LogParseError's
-            except LogParseError :
-                raise
-            
-            # wrap other errors as LogParseError
-            except Exception, e :
-                raise LogParseError(line, offset, "Parsing line failed: %s" % e)
-            
-            else :
-                # nothing?
-                if not line_info :
-                    continue
-                
-                # unpack, update date
-                date, line = line_info
-                
-                # yield
-                yield line
-
-

--- a/log_search.py	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,367 +0,0 @@
-"""
-    Full-text searching of logs
-"""
-
-import datetime, calendar, pytz
-import os.path
-
-import HyperEstraier as hype
-
-import log_line, utils, config
-
-class LogSearchError (Exception) :
-    """
-        General search error
-    """
-
-    pass
-
-class SearchIndexError (LogSearchError) :
-    """
-        Error manipulating the index
-    """
-
-    def __init__ (self, msg, db) :
-        """
-            Build the error from the given message + HyperEstraier.Database
-        """
-
-        super(SearchIndexError, self).__init__("%s: %s" % (msg, db.err_msg(db.error())))
-
-class NoResultsFound (LogSearchError) :
-    """
-        No results found
-    """
-
-    pass
-
-class LogSearchIndex (object) :
-    """
-        An index on the logs for a group of channels.
-
-        This uses Hyper Estraier to handle searching, whereby each log line is a document (yes, I have a powerful server).
-
-        These log documents have the following attributes:
-            @uri                - channel/date/line
-            channel             - channel code
-            type                - the LogType id
-            timestamp           - UTC timestamp
-            source_nickname     - source nickname
-            source_username     - source username
-            source_hostname     - source hostname
-            source_chanflags    - source channel flags
-            target_nickname     - target nickname
-
-        Each document then has a single line of data, which is the log data message
-    """
-
-    def __init__ (self, channels, path, mode='r') :
-        """
-            Open the database at the given path, with the given mode:
-                First char:
-                    r       - read, error if not exists
-                    w       - write, create if not exists
-                    a       - write, error if not exists
-                    c       - create, error if exists
-                
-                Additional chars:
-                    trunc   - truncate if exists
-                    +       - read as well as write
-                    ?       - non-blocking lock open, i.e. it fails if already open
-            
-            Channels is the ChannelList.
-        """
-
-        # store
-        self.channels = channels
-        self.path = path
-        self.mode = mode
-
-        # check it does not already exist?
-        if mode in 'c' and os.path.exists(path) :
-            raise LogSearchError("Index already exists: %s" % (path, ))
-        
-        # mapping of { mode -> flags }
-        mode_to_flag = {
-            'r':    hype.Database.DBREADER,
-            'w':    hype.Database.DBWRITER | hype.Database.DBCREAT,
-            'a':    hype.Database.DBWRITER,
-            'c':    hype.Database.DBWRITER | hype.Database.DBCREAT,
-        }
-
-        # flags to use, standard modes
-        flags = mode_to_flag[mode[0]]
- 
-        # mode-flags
-        if '?' in mode :
-            # non-blocking locking
-            flags |= hype.Database.DBLCKNB
-        
-        elif '+' in mode :
-            # read
-            flags |= hype.Database.DBREADER
-
-        elif 'trunc' in mode :
-            # truncate. Dangerous!
-            flags |= hype.Database.DBTRUNC
-       
-        # make instance
-        self.db = hype.Database()
-        
-        # open
-        if not self.db.open(path, flags) :
-            raise SearchIndexError("Index open failed: %s, mode=%s, flags=%#06x" % (path, mode, flags), self.db)
-    
-    def close (self) :
-        """
-            Explicitly close the index, this is done automatically on del
-        """
-
-        if not self.db.close() :
-            raise SearchIndexError("Index close failed", self.db)
-
-    def insert (self, channel, lines) :
-        """
-            Adds a sequence of LogLines from the given LogChannel to the index, and return the number of added items
-        """
-        
-        # count from zero
-        count = 0
-        
-        # iterate
-        for line in lines :
-            # insert
-            self.insert_line(channel, line)
-
-            # count
-            count += 1
-        
-        # return
-        return count
-
-    def insert_line (self, channel, line) :
-        """
-            Adds a single LogLine for the given LogChannel to the index
-        """
-
-        # validate the LogChannel
-        assert channel.id
-
-        # validate the LogLine
-        assert line.offset
-        assert line.timestamp
-
-        # create new document
-        doc = hype.Document()
-
-        # line date
-        date = line.timestamp.date()
-
-        # ensure that it's not 1900
-        assert date.year != 1900
-
-        # add URI
-        doc.add_attr('@uri',        "%s/%s/%d" % (channel.id, date.strftime('%Y-%m-%d'), line.offset))
-
-        # add channel id
-        doc.add_attr('channel',     channel.id)
-
-        # add type
-        doc.add_attr('type',        str(line.type))
-
-        # add UTC timestamp
-        doc.add_attr('timestamp',   str(utils.to_utc_timestamp(line.timestamp)))
-
-        # add source attribute?
-        if line.source :
-            source_nickname, source_username, source_hostname, source_chanflags = line.source
-
-            if source_nickname :
-                doc.add_attr('source_nickname', source_nickname.encode('utf8'))
-            
-            if source_username :
-                doc.add_attr('source_username', source_username.encode('utf8'))
-
-            if source_hostname :
-                doc.add_attr('source_hostname', source_hostname.encode('utf8'))
-
-            if source_chanflags :
-                doc.add_attr('source_chanflags', source_chanflags.encode('utf8'))
-        
-        # add target attributes?
-        if line.target :
-            target_nickname = line.target
-
-            if target_nickname :
-                doc.add_attr('target_nickname', target_nickname.encode('utf8'))
-
-        # add data
-        if line.data :
-            doc.add_text(line.data.encode('utf8'))
-
-        # put, "clean up dispensable regions of the overwritten document"
-        if not self.db.put_doc(doc, hype.Database.PDCLEAN) :
-            raise SearchIndexError("put_doc", self.db)
-            
-    def search_cond (self, cond) :
-        """
-            Search using a raw hype.Condition. Raises NoResultsFound if there aren't any results
-        """
-
-        # execute search, unused 'flags' arg stays zero
-        results = self.db.search(cond, 0)
-
-        # no results?
-        if not results :
-            raise NoResultsFound()
-
-        # iterate over the document IDs
-        for doc_id in results :
-            # load document, this throws an exception...
-            # option constants are hype.Database.GDNOATTR/GDNOTEXT
-            doc = self.db.get_doc(doc_id, 0)
-
-            # load the attributes/text
-            channel         = self.channels.lookup(doc.attr('channel'))
-            type            = int(doc.attr('type'))
-            timestamp       = utils.from_utc_timestamp(int(doc.attr('timestamp')))
-
-            # source
-            source = (doc.attr('source_nickname'), doc.attr('source_username'), doc.attr('source_hostname'), doc.attr('source_chanflags'))
-
-            # target
-            target = doc.attr('target_nickname')
-            
-            # message text
-            message         = doc.cat_texts().decode('utf8')
-
-            # build+yield to as LogLine
-            yield log_line.LogLine(channel, None, type, timestamp, source, target, message)
-    
-    def search (self, options=None, channel=None, attrs=None, phrase=None, order=None, max=None, skip=None) :
-        """
-            Search with flexible parameters
-
-                options     - bitmask of hype.Condition.*
-                channel     - LogChannel object
-                attrs       - raw attribute expressions
-                phrase      - the search query phrase
-                order       - order attribute expression
-                max         - number of results to return
-                skip        - number of results to skip
-        """
-
-        # build condition
-        cond = hype.Condition()
-        
-        if options :
-            # set options
-            cond.set_options(options)
-        
-        if channel :
-            # add channel attribute
-            cond.add_attr(("channel STREQ %s" % channel.id).encode('utf8'))
-        
-        if attrs :
-            # add attributes
-            for attr in attrs :
-                cond.add_attr(attr.encode('utf8'))
-
-        if phrase :
-            # add phrase
-            cond.set_phrase(phrase.encode('utf8'))
-        
-        if order :
-            # set order
-            cond.set_order(order)
-        
-        if max :
-            # set max
-            cond.set_max(max)
-
-        if skip :
-            # set skip
-            cond.set_skip(skip)
-
-        # execute
-        return self.search_cond(cond)
-
-    def search_simple (self, channel, query, count=None, offset=None, search_msg=True, search_nick=False) :
-        """
-            Search for lines from the given channel for the given simple query.
-
-            The search_* params define which attributes to search for (using fulltext search for the message, STROR for
-            attributes).
-        """
-        
-        # search attributes
-        attrs = []
-
-        # nickname target query
-        if search_nick :
-            attrs.append("source_nickname STRINC %s" % query)
-#            attrs.append("target_nickname STRINC %s" % query)
-        
-        # use search(), backwards
-        results = list(self.search(
-            # simplified phrase
-            options     = hype.Condition.SIMPLE,
-
-            # specific channel
-            channel     = channel,
-
-            # given phrase
-            phrase      = query if search_msg else None,
-
-            # attributes defined above
-            attrs       = attrs,
-
-            # order by timestamp, descending (backwards)
-            order       = "timestamp NUMD",
-
-            # count/offset
-            max         = count,
-            skip        = offset,
-        ))
-        
-        # reverse
-        return reversed(results)
-
-    def list (self, channel, date, count=None, skip=None) :
-        """
-            List all indexed log items for the given UTC date
-        """
-
-        # start/end dates
-        dt_start = datetime.datetime(date.year, date.month, date.day, 0, 0, 0, 0)
-        dt_end   = datetime.datetime(date.year, date.month, date.day, 23, 23, 59, 999999)
-        
-        # search
-        return self.search(
-            # specific channel
-            channel     = channel,
-
-            # specific date range
-            attrs       = [
-                "timestamp NUMBT %d %d" % (utils.to_utc_timestamp(dt_start), utils.to_utc_timestamp(dt_end))
-            ],
-
-            # order correctly
-            order       = "timestamp NUMA",
-
-            # max count/offset
-            max         = count,
-            skip        = skip
-        )
-
-def get_index () :
-    """
-        Returns the default read-only index, suitable for searching
-    """
-    
-    # XXX: no caching, just open it every time
-    _index = LogSearchIndex(config.LOG_CHANNELS, config.SEARCH_INDEX_PATH, 'r')
-
-    # return
-    return _index
-

--- a/log_source.py	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,679 +0,0 @@
-"""
-    A source of IRC log files
-"""
-
-import datetime, calendar, itertools, functools, math
-import os, os.path, errno
-import pytz
-
-import config, utils
-
-# a timedelta that represents one day
-ONE_DAY = datetime.timedelta(days=1)
-
-class LogSourceDecoder (object) :
-    """
-        Handles decoding of LogSource lines
-    """
-
-    def __init__ (self, encoding_list) :
-        """
-            Will try each of the given (charset, errors) items in turn, until one succeeds
-        """
-
-        self.encoding_list = encoding_list
-    
-    def decode (self, line) :
-        """
-            Decode the line of str() text into an unicode object
-        """
-        
-        # list of errors encountered
-        error_list = []
-        
-        # try each in turn
-        for charset, errors in self.encoding_list :
-            # trap UnicodeDecodeError to try with the next one
-            try :
-                return line.decode(charset, errors)
-
-            except UnicodeDecodeError, e :
-                error_list.append("%s:%s - %s" % (charset, errors, e))
-                continue
-
-        # failure
-        raise UnicodeDecodeError("Failed to decode line: %r: %s" % (line, ', '.join(error_list)))
-
-class LogSource (object) :
-    """
-        A collection of IRC logs for a specific target in some format. Provides the possibility to read specific events
-    """
-    
-    def __init__ (self, decoder, channel=None) :
-        """
-            The appropriate LogChannel must be given, as we need to be able to construct the LogLines. If it is not yet
-            known, then it can be given as None, and set later with bind_channel.
-
-            Uses the given LogSourceDecoder to decode the lines.
-        """
-        
-        self.channel = channel
-        self.decoder = decoder
-    
-    def bind_channel (self, channel) :
-        """
-            Set this source's channel, where None was set before
-        """
-
-        assert not self.channel
-
-        self.channel = channel
-
-    def get_latest (self, count) :
-        """
-            Yield the latest events, up to `count` of them.
-        """
-
-        abstract
-    
-    def get_date (self, dt) :
-        """
-            Get logs for the given date (as a datetime).
-        """
-
-        abstract
-    
-    def get_date_paged (self, dt, count, page=None) :
-        """
-            Get the logs for a given date (as a datetime), divided into pages of count each. If page is given, the time
-            portion of the dt is ignored, and the lines for the given page are returned. Otherwise, if page is None,
-            then the lines for the page containing the given timestamp is returned.
-
-            The return value is a (page, max, lines) tuple.
-        """
-        
-        # how to act?
-        if page :
-            # constant skip
-            skip = (page - 1) * count
-
-        else :
-            skip = None
-
-            # go through the logs a page at a time
-            this_page = 1
-
-            # last line's timestamp
-            last_ts = None
-
-            # found it yet?
-            found = False
-
-        # count the full number of lines
-        line_count = 0
-
-        # collect lines
-        lines = []
-
-        # iterate using get_date
-        for line in self.get_date(dt) :
-            # count them
-            line_count += 1
-
-            # skip?
-            if skip :
-                skip -= 1
-                continue
-            
-            # is this page all that we want/need?
-            if page or found :
-                # already full?
-                if len(lines) >= count :
-                    continue
-
-            # specfic timestamp
-            else :
-                # didn't find it in this page?
-                if len(lines) >= count :
-                    # reset to next page
-                    lines = []
-                    this_page += 1
-
-                # is dt between these two timestamps?
-                if (not last_ts or last_ts <= dt) and (dt <= line.timestamp) :
-                    # found!
-                    found = True
-                    page = this_page
-
-                else :
-                    # keep looking
-                    last_ts = line.timestamp
-
-            # store line
-            lines.append(line)
-        
-        # calculate max_pages
-        max_pages = math.ceil(float(line_count) / count)
-        
-        # return
-        return (page, max_pages, lines)
-
-    def get_month_days (self, dt) :
-        """
-            Return an ordered sequence of dates, telling which days in the given month (as a datetime) have logs available.
-        """
-
-        abstract
-    
-    def get_modified (self, dt=None, after=None, until=None) :
-        """
-            Returns a sequence of LogLines that may have been *modified* from their old values since the given datetime.
-
-            If the datetime is not given, *all* lines are returned.
-
-            If after is given, only lines from said date onwards will be returned, regardless of modification.
-            If until is given, only lines up to and including said date will be returned, regardless of modification.
-
-            The LogLines should be in time order.
-        """
-
-        abstract
-    
-    def get_prev_date (self, dt) :
-        """
-            Get the next distinct date of logs available preceeding the given date, or None
-        """
-
-        abstract
-
-    def get_next_date (self, dt) :
-        """
-            Get the next distinct date of logs following the given date, or None.
-        """
-        
-        abstract
-
-class LogFile (object) :
-    """
-        A file containing LogEvents
-
-        XXX: modify to implement LogSource?
-    """
-
-    def __init__ (self, path, parser, decoder, channel=None, start_date=None, sep='\n') :
-        """
-            Open the file at the given path, which contains lines as separated by the given separator. Lines are
-            decoded using the given LogSourceDecoder, and then parsed using the given parser, using the given date
-            as the initial date for this log's first line.
-            
-            XXX: currently we assume start_date also for the end of the file
-        """
-        
-        # store
-        self.channel = channel
-        self.path = path
-        self.parser = parser
-        self.start_date = start_date
-        self.decoder = decoder
-        self.sep = sep
-
-        # open
-        self.file = open(path, 'rb')
-
-    def __iter__ (self) :
-        """
-            Yields a series of unicode lines, as read from the top of the file
-        """
-        
-        # seek to beginning
-        self.file.seek(0)
-
-        # iterate over lines, decoding them as well
-        return (self.decoder.decode(line.rstrip(self.sep)) for line in self.file)
-    
-    def read_full (self) :
-        """
-            Reads all LogLines. The LogLines will have a valid offset.
-        """
-        
-        # just use our __iter__
-        return self.parser.parse_lines(self.channel, self, self.start_date, starting_offset=1)
-
-    def read_from (self, dt) :
-        """
-            Reads all LogLines from the given naive timestamp onwards
-        """
-        
-        # start reading at beginning
-        events = self.read_full()
-        
-        # skip unwanted events
-        for event in events :
-            if event.timestamp < dt :
-                continue
-
-            else :
-                # include this line as well
-                yield event
-                break
-        
-        # yield the rest as-is
-        for event in events :
-            yield event
-
-    def read_until (self, dt) :
-        """
-            Reads all LogLines up until the given naive timestamp
-        """
-
-        # start reading events at the beginning
-        events = self.read_full()
-
-        # yield events until we hit the given timestamp
-        for event in events :
-            if event.timestamp <= dt :
-                yield event
-
-            else :
-                break
-            
-        # ignore the rest
-        return
-
-    def _read_blocks_reverse (self, blocksize=1024) :
-        """
-            Yields blocks of file data in reverse order, starting at the end of the file
-        """
-
-        # seek to end of file
-        self.file.seek(0, os.SEEK_END)
-
-        # read offset
-        # XXX: hack -1 to get rid of trailing newline
-        size = offset = self.file.tell() - 1
-        
-        # do not try to read past the beginning of the file
-        while offset > 0:
-            # calc new offset + size
-            if offset > blocksize :
-                # full block
-                offset -= blocksize
-                read_size = blocksize
-
-            else :
-                # partial block
-                read_size = offset
-                offset = 0
-
-            # seek to offset
-            self.file.seek(offset)
-
-            # read the data we want
-            block = self.file.read(read_size)
-
-            # sanity check
-            assert len(block) == read_size
-
-            # yield 
-            yield block
-    
-    def _read_lines_reverse (self) :
-        """
-            Yields decoded lines from the end of the file, in reverse order.
-        """
-
-        # partial lines
-        buf = ''
-        
-        # read from end of file, a block at a time
-        for block in self._read_blocks_reverse() :
-            # add in our previous buf
-            buf = block + buf
-            
-            # split up lines
-            lines = buf.split(self.sep)
-
-            # keep the first one as our buffer, as it's incomplete
-            buf = lines[0]
-           
-            # yield the rest a line at a time in reverse order... this looks weird, but that's how slicing works :)
-            # XXX: use something like islice, this has to build a slice object
-            for line in lines[:0:-1] :
-                yield self.decoder.decode(line)
-
-    def read_latest (self, count) :
-        """
-            Returns up to count events, from the end of the file, or less, if the file doesn't contain that many lines.
-        """
-
-        # the list of lines
-        lines = []
-
-        # start reading lines into lines
-        for line in self._read_lines_reverse() :
-            # append
-            lines.append(line)
-
-            # done?
-            if len(lines) >= count :
-                break
-        
-        # decode in reverse order, using our starting date....
-        # XXX: use lines[::-1] or reversed?
-        # XXX: it may make more sense to parse in reverse order, using 'self.end_date' or something like that
-        return self.parser.parse_lines(self.channel, reversed(lines), self.start_date)
-
-class LogDirectory (LogSource) :
-    """
-        A directory containing a series of timestamped LogFiles
-    """
-
-    def __init__ (self, path, tz, parser, decoder, filename_fmt, channel=None) :
-        """
-            Load the logfiles at the given path, which are for the given LogChannel
-            
-            Decode the file lines using the given decoder, the files are named according the the date in the given
-            timezone and date format, and will be parsed using the given parser.
-        """
-
-        # store
-        self.channel = channel
-        self.path = path
-        self.tz = tz
-        self.parser = parser
-        self.decoder = decoder
-        self.filename_fmt = filename_fmt
-
-    def _get_logfile_date (self, d, load=True, mtime=False, ignore_missing=False) :
-        """
-            Get the logfile corresponding to the given naive date in our timezone. 
-            
-            If load is False, only test for the presence of the logfile, do not actually open it. If mtime is given,
-            then this returns the file's mtime
-
-            Returns None if the logfile does not exist, unless ignore_missing is given as False.
-        """
-
-        # format filename
-        filename = d.strftime(self.filename_fmt)
-
-        # build path
-        path = os.path.join(self.path, filename)
-        
-        try :
-            if load :
-                # open+return the LogFile
-                return LogFile(path, self.parser, self.decoder, start_date=d, channel=self.channel)
-            
-            elif mtime :
-                # stat
-                return utils.mtime(path)
-
-            else :
-                # test
-                return os.path.exists(path)
-
-        # XXX: move to LogFile
-        except IOError, e :
-            # return None for missing files
-            if e.errno == errno.ENOENT and ignore_missing :
-                return None
-
-            else :
-                raise
-    
-    def _iter_logfile_dates (self, after=None, until=None, reverse=False) :
-        """
-            Yields a series of naive datetime objects representing the logfiles that are available, in time order.
-            
-            Parameters :
-                after   only dates from said date onwards will be returned
-                until   only dates up to and including said date will be returned
-                reverse the dates are returned in reverse order instead. Note that the meaning of after/until doesn't change
-        """
-
-        # convert timestamps to our timezone's dates
-        if after :
-            after = after.astimezone(self.tz).date()
-
-        if until :
-            until = until.astimezone(self.tz).date()
-
-        # listdir
-        filenames = os.listdir(self.path)
-
-        # sort
-        filenames.sort(reverse=reverse)
-
-        # iter files
-        for filename in filenames :
-            try :
-                # parse date
-                dt = self.tz.localize(datetime.datetime.strptime(filename, self.filename_fmt))
-                date = dt.date()
-            
-            except :
-                # ignore
-                continue
-
-            else :
-                if (after and date < after) or (until and date > until) :
-                    # ignore
-                    continue
-                
-                else :
-                    # yield
-                    yield dt
-            
-    def _iter_date_reverse (self, dt=None) :
-        """
-            Yields an infinite series of naive date objects in our timezone, iterating backwards in time starting at the
-            given *datetime*, or the the current date, if none given
-        """
-        
-        # default to now
-        if not dt :
-            dtz = self.tz.localize(datetime.datetime.now())
-
-        else :
-            # convert to target timezone
-            dtz = dt.astimezone(self.tz)
-
-        # iterate unto infinity
-        while True :
-            # yield
-            yield dtz.date()
-            
-            # one day sdrawkcab
-            dtz -= ONE_DAY
-    
-    def _iter_logfile_reverse (self, dt=None, max_files=100) :
-        """
-            Yields a series of LogFile objects, iterating backwards in time starting at the given datetime, or the
-            current date, if none given.
-
-            Reads/probes at most max_files files.
-        """
-        
-        # start counting at zero...
-        file_count = 0
-
-        # have we found any files at all so far?
-        have_found = False
-
-        # iterate backwards over days
-        for day in self._iter_date_reverse(dt) :
-            # stop if we've handled enough files by now
-            if file_count > max_files :
-                break
-            
-            # try and open the next logfile
-            logfile = None
-            
-            file_count += 1
-            logfile = self._get_logfile_date(day, ignore_missing=True)
-            
-            # no logfile there?
-            if not logfile :
-                # hit our limit?
-                if file_count > max_files :
-                    # if we didn't find any logfiles at all, terminate rudely
-                    if not have_found :
-                        raise Exception("No recent logfiles found")
-                    
-                    else :
-                        # stop looking, deal with what we've got
-                        return
-
-                else :
-                    # skip to next day
-                    continue
-            
-            # mark have_found
-            have_found = True
-
-            # yield it
-            yield logfile
-
-    def get_latest (self, count) :
-        """
-            Uses _logfile_reverse to read the yield the given lines from as many logfiles as needed
-        """
-
-        # read the events into here
-        lines = []
-        
-        # start reading in those logfiles
-        for logfile in self._iter_logfile_reverse() :
-            # read the events
-            # XXX: use a queue
-            lines = list(logfile.read_latest(count)) + lines
-
-            # done?
-            if len(lines) >= count :
-                break
-        
-        # return the events
-        return lines
-
-    def get_date (self, dt) :
-        """
-            A 'day' is considered to be a 24-hour period from 00:00:00 23:59:59. If the timezone of the given datetime
-            differs from our native datetime, this may involve lines from more than one logfile.
-        """
-
-        # begin/end of 24h period, in target timezone
-        dtz_begin = dt.replace(hour=0, minute=0, second=0).astimezone(self.tz)
-        dtz_end = dt.replace(hour=23, minute=59, second=59, microsecond=999999).astimezone(self.tz)
-
-        # as dates
-        d_begin = dtz_begin.date() 
-        d_end = dtz_end.date()
-        
-#        print
-#        print "LogDirectory.get_date - %s" % dt
-#        print "\t   %s %s" % (d_begin, dtz_begin)
-#        print "\t-> %s %s" % (d_end, dtz_end)
-
-        # if they're the same, just pull the full log for that date
-        if d_begin == d_end :
-            # open that log
-            logfile = self._get_logfile_date(d_begin)
-            
-            # return the full data
-            return logfile.read_full()
-        
-        # otherwise, we need to pull two partial logs
-        else :
-            # open both of them, but it's okay if we don't have the second one
-            f_begin = self._get_logfile_date(d_begin)
-            f_end = self._get_logfile_date(d_end, ignore_missing=True)
-
-            # chain together the two sources
-            return itertools.chain(
-                f_begin.read_from(dtz_begin), 
-                f_end.read_until(dtz_end) if f_end else []
-            )
-    
-    def _iter_month_days (self, month) :
-        """
-            Iterates over the days of a month as dt objects with time=0
-        """
-        
-        # there's at most 31 days in a month...
-        for day in xrange(1, 32) :
-            try :
-                # try and build the datetime
-                dt = datetime.datetime(month.year, month.month, day)
-
-            except :
-                # stop
-                return
-            
-            else :
-                # fix timezones + yield
-                yield month.tzinfo.localize(dt)
-
-    def get_month_days (self, month) :
-        """
-            Returns a set of dates for which logfiles are available in the given datetime's month
-        """
-
-        # iterate over month's days
-        for dt in self._iter_month_days(month) :
-            # date in our target timezone
-            log_date = dt.astimezone(self.tz).date()
-            
-            # test for it
-            if self._get_logfile_date(log_date, load=False, ignore_missing=True) :
-                # valid
-                yield dt.date()
-
-    def get_modified (self, dt=None, after=None, until=None) :
-        """
-            Returns the contents off all logfiles with mtimes past the given date
-        """
-
-        # iterate through all available logfiles in date order, as datetimes, from the given date on
-        for log_date in self._iter_logfile_dates(after, until) :
-            # compare against dt?
-            if dt :
-                # stat
-                mtime = self._get_logfile_date(log_date, load=False, mtime=True, ignore_missing=True)
-                
-                # not modified?
-                if mtime < dt :
-                    # skip
-                    continue
-                
-            # open
-            logfile = self._get_logfile_date(log_date)
-
-            # yield all lines
-            for line in logfile.read_full() :
-                yield line
-
-    def get_prev_date (self, dt) :
-        """
-            Just use _iter_logfile_dates
-        """
-        
-        # use for to "iter" once
-        for log_date in self._iter_logfile_dates(until=dt - ONE_DAY, reverse=True) :
-            return log_date
-        
-        else :
-            return None
-
-    def get_next_date (self, dt) :
-        """
-            Just use _iter_logfile_dates
-        """
-        
-        # use for to "iter" once
-        for log_date in self._iter_logfile_dates(after=dt + ONE_DAY) :
-            return log_date
-        
-        else :
-            return None
-

--- a/preferences.py	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,534 +0,0 @@
-"""
-    Handling user preferences
-"""
-
-import functools
-import Cookie
-
-from qmsk.web import urltree
-import utils
-
-class Preference (urltree.URLType) :
-    """
-        A specific preference
-    """
-
-    # the name to use
-    name = None
-
-    # the default value, as from parse()
-    default = None
-    
-    def is_default (self, value) :
-        """
-            Returns True if the given post-value is the default value for this preference.
-
-            Defaults to just compare against self.default
-        """
-
-        return (value == self.default)
-        
-    def process (self, preferences, value) :
-        """
-            Post-process this preference value. This can access the post-processed values of all other preferences that
-            were defined before this one in the list given to Preferences. 
-
-            Defaults to just return value.
-        """
-
-        return value
-
-class RequestPreferences (object) :
-    """
-        Represents the specific preferences for some request
-    """
-
-    def __init__ (self, preferences, request, value_map=None) :
-        """
-            Initialize with the given Preferences object, http Request, and { key: value } mapping of raw preference values.
-
-            This will build a mapping of { name: pre-value } using Preference.parse/Preference.default, and then
-            post-process them into the final { name: value } mapping using Preference.process, in strict pref_list
-            order. Note that the process() method will only have access to those preferences processed before it was.
-        """
-        
-        # store
-        self.preferences = preferences
-        self.request = request
-
-        # initialize
-        self.values = {}
-        self.set_cookies = {}
-
-        # initial value map
-        pre_values = {}
-
-        # load preferences
-        for pref in preferences.pref_list :
-            # got a value for it?
-            if value_map and pref.name in value_map :
-                # get value
-                value = value_map[pref.name]
-
-                # parse it
-                value = pref.parse(value)
-
-            else :
-                # use default value
-                value = pref.default
-                
-            # add
-            pre_values[pref.name] = value
-        
-        # then post-process using Preferences.process(), in strict pref_list order
-        for pref in preferences.pref_list :
-            # store into self.values, so that pref.get(...) will be able to access the still-incomplete self.values
-            # dict
-            self.values[pref.name] = pref.process(self, pre_values[pref.name])
-    
-    def _get_name (self, pref) :
-        """
-            Look up a Preference's name, either by class, object or name.
-        """
-
-        # Preference -> name
-        if isinstance(pref, Preference) :
-            pref = pref.name
-
-        return pref
-    
-    def pref (self, name) :
-        """
-            Look up a Preference by object, name
-        """
-
-        # Preference
-        if isinstance(name, Preference) :
-            return name
-        
-        # Preference.name
-        elif isinstance(name, basestring) :
-            return self.preferences.pref_map[name]
-        
-        # XXX: class?
-        else :
-            assert False
-
-    def get (self, pref) :
-        """
-            Return the value for the given Preference, or preference name
-        """
-        
-        # look up
-        return self.values[self._get_name(pref)]
-
-    # support dict-access
-    __getitem__ = get
-    
-    def is_default (self, pref) :
-        """
-            Returns True if the given preference is at its default value
-        """
-        
-        # determine using Preference.is_default
-        return self.pref(pref).is_default(self.get(pref))
-
-    def build (self, pref) :
-        """
-            Like 'get', but return the raw cookie value
-        """
-        
-        # the Preference
-        pref = self.pref(pref)
-        
-        # build
-        return pref.build(self.get(pref))
-    
-    def parse (self, pref, value=None) :
-        """
-            Parse+process the raw value for some pref into a value object.
-
-            Is the given raw value is None, this uses Preference.default
-        """
-
-        # lookup pref
-        pref = self.pref(pref)
-        
-        # build value
-        if value is not None :
-            # parse
-            value = pref.parse(value)
-        
-        else :
-            # default
-            value = pref.default
-        
-        # post-process
-        value = pref.process(self, value)
-
-        # return
-        return value
-
-    def set (self, name, value_obj=None) :
-        """
-            Set a new value for the given preference (by str name).
-
-            If value_obj is None, then the preference cookie is unset
-        """
-
-        # sanity-check to make sure we're not setting it twice...
-        assert name not in self.set_cookies
-        
-        # None?
-        if value_obj is not None :
-            # encode using the Preference object
-            value_str = self.preferences.pref_map[name].build(value_obj)
-        
-        else :
-            # unset as None
-            value_str = None
-
-        # update in our dict
-        self.values[name] = value_obj
-
-        # add to set_cookies
-        self.set_cookies[name] = value_str
-
-class Preferences (object) :
-    """
-        Handle user preferences using cookies
-    """
-
-    def __init__ (self, pref_list) :
-        """
-            Use the given list of Preference objects.
-
-            The ordering of the given pref_list is significant for the process() implementation, as the
-            Preferences are process()'d in order.
-        """
-
-        # store
-        self.pref_list = pref_list
-
-        # translate to mapping as well
-        self.pref_map = dict((pref.name, pref) for pref in pref_list)
-
-    def load (self, request, ) :
-        """
-            Load the set of preferences for the given request, and return as a { name -> value } dict
-        """
-
-        # the dict of values
-        values = {}
-
-        # load the cookies
-        cookie_data = request.env.get('HTTP_COOKIE')
-
-        # got any?
-        if cookie_data :
-            # parse into a SimpleCookie
-            cookies = Cookie.SimpleCookie(cookie_data)
-
-            # update the the values
-            values.update((morsel.key, morsel.value) for morsel in cookies.itervalues())
-        
-        else :
-            cookies = None
-
-        # apply any query parameters
-        for pref in self.pref_list :
-            # look for a query param
-            value = request.get_arg(pref.name)
-
-            if value :
-                # override
-                values[pref.name] = value
-
-        # build the RequestPreferences object
-        return cookies, RequestPreferences(self, request, values)
-
-    def handler (self, *pref_list) :
-        """
-            Intended to be used as a decorator for a request handler, this will load the give Preferences and pass
-            them to the wrapped handler as keyword arguments, in addition to any others given.
-        """
-
-        def _decorator (func) :
-            @functools.wraps(func)
-            def _handler (request, **args) :
-                # load preferences
-                cookies, prefs = self.load(request)
-
-                # bind to request.prefs
-                # XXX: better way to do this? :/
-                request.prefs = prefs
-
-                # update args with new ones
-                args.update(((pref.name, prefs.get(pref)) for pref in pref_list))
-
-                # handle to get response
-                response = func(request, **args)
-
-                # set cookies?
-                if prefs.set_cookies :
-                    # default, empty, cookiejar
-                    if not cookies :
-                        cookies = Cookie.SimpleCookie('')
-
-                    # update cookies
-                    for key, value in prefs.set_cookies.iteritems() :
-                        if value is None :
-                            assert False, "Not implemented yet..."
-
-                        else :
-                            # set
-                            cookies[key] = value
-                            cookies[key]["path"] = config.PREF_COOKIE_PATH
-                            cookies[key]["expires"] = config.PREF_COOKIE_EXPIRE_SECONDS
-
-                    # add headers
-                    for morsel in cookies.itervalues() :
-                        response.add_header('Set-cookie', morsel.OutputString())
-
-                return response
-            
-            # return wrapped handler
-            return _handler
-        
-        # return decorator...
-        return _decorator
-
-# now for our defined preferences....
-import pytz
-import config
-
-class TimeFormat (urltree.URLStringType, Preference) :
-    """
-        Time format
-    """
-
-    # set name
-    name = 'time_format'
-
-    # default value
-    default = config.PREF_TIME_FMT_DEFAULT
-
-class DateFormat (urltree.URLStringType, Preference) :
-    """
-        Date format
-    """
-
-    # set name
-    name = 'date_format'
-
-    # default value
-    default = config.PREF_DATE_FMT_DEFAULT
-
-class TimezoneOffset (Preference) :
-    """
-        If the DST-aware 'timezone' is missing, we can fallback to a fixed-offset timezone as detected by
-        Javascript.
-
-        This is read-only, and None by default
-    """
-
-    name = 'timezone_offset'
-    default = None
-
-    def parse (self, offset) :
-        """
-            Offset in minutes -> said minutes
-        """
-
-        return int(offset)
-
-class Timezone (Preference) :
-    """
-        Timezone
-    """
-    
-    # set name
-    name = 'timezone'
-
-    # default is handled via process()
-    default = 'auto'
-
-    # the list of available (value, name) options for use with helpers.select_options
-    OPTIONS = [('auto', "Autodetect")] + [(None, tz_name) for tz_name in pytz.common_timezones]
-
-    def parse (self, name) :
-        """
-            default -> default
-            tz_name -> pytz.timezone
-        """
-        
-        # special-case for 'auto'
-        if name == self.default :
-            return self.default
-
-        else :
-            return pytz.timezone(name)
-
-    def is_default (self, tz) :
-        """
-            True if it's a FixedOffsetTimezone or PREF_TIMEZONE_FALLBACK
-        """
-
-        return (isinstance(tz, utils.FixedOffsetTimezone) or tz == config.PREF_TIMEZONE_FALLBACK)
-
-    def build (self, tz) :
-        """
-            FixedOffsetTimezone -> None
-            pytz.timezone -> tz_name
-        """
-        
-        # special-case for auto/no explicit timezone
-        if self.is_default(tz) :
-            return self.default
-
-        else :
-            # pytz.timezone zone name
-            return tz.zone
-    
-    def process (self, prefs, tz) :
-        """
-            If this timezone is given, simply build that. Otherwise, try and use TimezoneOffset, and if that fails,
-            just return the default.
-
-            None -> FixedOffsetTimezone/PREF_TIMEZONE_FALLBACK
-            pytz.timezone -> pytz.timezone
-        """
-        
-        # specific timezone set?
-        if tz != self.default :
-            return tz
-        
-        # fixed offset?
-        elif prefs[timezone_offset] is not None :
-            return utils.FixedOffsetTimezone(prefs[timezone_offset])
-        
-        # default
-        else :
-            return config.PREF_TIMEZONE_FALLBACK
-
-class ImageFont (Preference) :
-    """
-        Font for ImageFormatter
-    """
-
-    # set name
-    name = 'image_font'
-    
-    def __init__ (self, font_dict, default_name) :
-        """
-            Use the given { name: (path, title) } dict and default the given name
-        """
-
-        self.font_dict = font_dict
-        self.default = self.parse(default_name)
-    
-    def parse (self, name) :
-        """
-            name -> (name, path, title)
-        """
-
-        path, title = self.font_dict[name]
-
-        return name, path, title
-    
-    def build (self, font_info) :
-        """
-            (name, path, title) -> name
-        """
-
-        name, path, title = font_info
-
-        return name
-
-class ImageFontSize (urltree.URLIntegerType, Preference) :
-    # set name, default
-    name = 'image_font_size'
-    default = config.PREF_IMAGE_FONT_SIZE_DEFAULT
-    
-    # XXX: constraints for valid values
-
-class Formatter (Preference) :
-    """
-        LogFormatter to use
-    """
-
-    # set name
-    name = 'formatter'
-
-    def __init__ (self, formatters, default) :
-        """
-            Use the given { name -> class LogFormatter } dict and default (a LogFormatter class)
-        """
-
-        self.formatters = formatters
-        self.default = default
-    
-    def parse (self, fmt_name) :
-        """
-            fmt_name -> class LogFormatter
-        """
-
-        return self.formatters[fmt_name]
-    
-    def build (self, fmt_cls) :
-        """
-            class LogFormatter -> fmt_name
-        """
-
-        return fmt_cls.name
-    
-    def process (self, prefs, fmt_cls) :
-        """
-            class LogFormatter -> LogFormatter(tz, time_fmt, image_font.path)
-        """
-
-        # time stuff
-        tz = prefs[timezone]
-        time_fmt = prefs[time_format]
-        
-        # font stuff
-        font_name, font_path, font_title = prefs[image_font]
-        font_size = prefs[image_font_size]
-
-        return fmt_cls(tz, time_fmt, font_path, font_size)
-
-class Count (urltree.URLIntegerType, Preference) :
-    """
-        Number of lines of log data to display per page
-    """
-
-    # set name
-    name = "count"
-    
-    # default
-    default = config.PREF_COUNT_DEFAULT
-    
-    def __init__ (self) :
-        super(Count, self).__init__(allow_negative=False, allow_zero=False, max=config.PREF_COUNT_MAX)
-
-# and then build the Preferences object
-time_format     = TimeFormat()
-date_format     = DateFormat()
-timezone_offset = TimezoneOffset()
-timezone        = Timezone()
-image_font      = ImageFont(config.FORMATTER_IMAGE_FONTS, config.PREF_IMAGE_FONT_DEFAULT)
-image_font_size = ImageFontSize()
-formatter       = Formatter(config.LOG_FORMATTERS, config.PREF_FORMATTER_DEFAULT)
-count           = Count()
-
-preferences = Preferences([
-    time_format,
-    date_format,
-    timezone_offset,
-    timezone,
-    image_font,
-    image_font_size,
-    formatter,
-    count,
-])
-

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/__init__.py	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,10 @@
+"""
+    The irclogs.qmsk.net site is an IRC log browser
+"""
+
+# the URL mapper
+import urls
+
+# our RequestHandler
+handler = urls.mapper
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/channels.py	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,38 @@
+"""
+    Our list of LogChannels
+"""
+
+class ChannelList (object) :
+    """
+        The list of channels, and related methods
+    """
+
+
+    def __init__ (self, channel_list) :
+        """
+            Initialize with the given channel dict
+        """
+        
+        self.channel_list = channel_list
+        self.channel_dict = dict((channel.id, channel) for channel in channel_list)
+
+    def lookup (self, channel_name) :
+        """
+            Looks up the LogChannel for the given name
+        """
+
+        return self.channel_dict[channel_name]
+    
+    def dict (self) :
+        """
+            Returns a { name: LogChannel } dict
+        """
+        return self.channel_dict
+
+    def __iter__ (self) :
+        """
+            Iterate over our defined LogChannel objects
+        """
+
+        return iter(self.channel_list)
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/config.py	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,112 @@
+"""
+    Configureable defaults
+"""
+
+import os.path, pytz
+from log_parser import IrssiParser
+from log_channel import LogChannel
+from log_source import LogSourceDecoder, LogDirectory
+from log_formatter import IrssiFormatter, DebugFormatter
+from channels import ChannelList
+import log_formatter
+
+# build relative paths to the dir containing this file
+relpath = lambda path : os.path.join(os.path.dirname(__file__), path)
+
+###                     ###
+###    Configuration    ###
+###                     ###
+
+# timezone to use for logs
+LOG_TIMEZONE                    = pytz.timezone('Europe/Helsinki')
+
+# timestamp format for logfiles
+LOG_TIMESTAMP_FMT               = '%H:%M:%S'
+
+# the decoder used for logfiles
+LOG_DECODER                     = LogSourceDecoder((
+    ('utf-8',       'strict'),
+    ('latin-1',     'replace'),
+))
+
+# log filename format
+LOG_FILENAME_FMT                = '%Y-%m-%d'
+
+# the log parser that we use
+LOG_PARSER                      = IrssiParser(LOG_TIMEZONE, LOG_TIMESTAMP_FMT)
+#LOG_PARSER_FULLTS               = IrssiParser(LOG_TIMEZONE, '%Y%m%d%H%M%S')
+
+# the statically defined channel list
+LOG_CHANNELS                    = ChannelList([
+    LogChannel('tycoon',    "OFTC",     "#tycoon", 
+        LogDirectory(relpath('/home/spbot/irclogs/tycoon'),    LOG_TIMEZONE, LOG_PARSER, LOG_DECODER, LOG_FILENAME_FMT)
+    ),
+
+    LogChannel('openttd',   "OFTC",     "#openttd", 
+        LogDirectory(relpath('/home/spbot/irclogs/openttd'),   LOG_TIMEZONE, LOG_PARSER, LOG_DECODER, LOG_FILENAME_FMT)
+    ),
+
+    LogChannel('test',      "TEST",     "#test",
+        LogDirectory(relpath('/home/spbot/irclogs/test'),  LOG_TIMEZONE, LOG_PARSER, LOG_DECODER, LOG_FILENAME_FMT)
+    )
+])
+
+# URL to the hgweb installation for this code
+HGWEB_URL                       = "http://hg.qmsk.net/irclogs2"
+
+# path to the mercurial working copy
+HG_WC_PATH                      = "."
+
+# how to handle decode() errors for logfile lines
+LOG_SOURCE_DECODE_ERRORS        = 'replace'
+
+# date format for URLs
+URL_DATE_FMT                    = '%Y-%m-%d'
+
+# month name format
+MONTH_FMT                       = '%B %Y'
+
+# timezone name format
+TIMEZONE_FMT                    = '%Z %z'
+
+# TTF fonts to use for drawing images
+FORMATTER_IMAGE_FONTS =         {
+    # XXX: no unicode support
+    #    'default':              (None,                                                                  "Ugly default font"         ),
+    'ttf-dejavu-mono':      ("/usr/share/fonts/truetype/ttf-dejavu/DejaVuSansMono.ttf",             "DejaVu Sans Mono"          ),
+    'ttf-liberation-mono':  ("/usr/share/fonts/truetype/ttf-liberation/LiberationMono-Regular.ttf", "Liberation Mono Regular"   )
+}
+
+# available formatters
+LOG_FORMATTERS =                {
+    'irssi':        IrssiFormatter,
+    'debug':        DebugFormatter,
+}
+
+# Cookie settings
+PREF_COOKIE_PATH                = '/'
+PREF_COOKIE_EXPIRE_SECONDS      = 1 * 365 * 24 * 60 * 60    # one year
+
+# default preferences
+PREF_TIME_FMT_DEFAULT           = '%H:%M:%S'
+PREF_DATE_FMT_DEFAULT           = '%Y-%m-%d'
+PREF_TIMEZONE_FALLBACK          = pytz.utc
+PREF_FORMATTER_DEFAULT          = IrssiFormatter
+PREF_COUNT_DEFAULT              = 200
+PREF_COUNT_MAX                  = None
+PREF_IMAGE_FONT_DEFAULT         = 'ttf-dejavu-mono'
+PREF_IMAGE_FONT_SIZE_DEFAULT    = 12
+PREF_IMAGE_FONT_SIZE_MAX        = 32
+
+# search line count options
+SEARCH_LINE_COUNT_OPTIONS       = (
+    (50,    50), 
+    (100,   100), 
+    (200,   200), 
+    (None,  "&#8734;"),
+)
+
+# search index database path
+SEARCH_INDEX_PATH               = '/home/spbot/irclogs/search-index'
+SEARCH_AUTOINDEX_PATH           = '/home/spbot/irclogs/search-autoindex'
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/error.py	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,183 @@
+"""
+    Build error messages
+"""
+
+import traceback, sys, cgi, urllib
+
+def truncate (msg, limit) :
+    """
+        Truncate the given message to <limit> chars
+    """
+
+    if len(msg) > limit :
+        return msg[:limit-3] + '...'
+
+    else :
+        return msg
+
+def build_link (title, url) :
+    return '<a href="%s">%s</a>' % (cgi.escape(url, True), cgi.escape(title))
+
+def build_error (exc_info=None, env=None) :
+    """
+        Dumps out a raw traceback of the given/current exception to stdout.
+
+        If request_env, it should be a environment dict, like under WSGI, and will be used to display additional info
+        about the request.
+
+        Returns a (status, content-type, body) tuple, with all components being non-unicode strs.
+    """
+
+    # default for exc_info is current exception
+    if not exc_info :
+        exc_info = sys.exc_info()
+
+    # request URL?
+    if env :
+        try :
+            from qmsk.web.http import request_url
+
+            url = request_url(env)
+
+        except :
+            # ignore
+            url = None
+    else :
+        url = None
+
+    # working copy path?
+    try :
+        from config import HG_WC_PATH, HGWEB_URL
+
+        wc_path = HG_WC_PATH
+        hgweb_url = HGWEB_URL
+
+    except :
+        # a good guess
+        wc_path = '.'
+        hgweb_url = None
+    
+    # version?
+    try :
+        from version import version_string, version_link_hg
+
+        version = version_string(wc_path)
+
+        if hgweb_url :
+            version_href = version_link_hg(hgweb_url, wc_path)
+
+        else :
+            version_href = None
+    
+    except :
+        version = None
+        version_href = None
+    
+    # the exception type
+    exception_str = traceback.format_exception_only(*exc_info[:2])[-1]
+
+    # the exception traceback
+    traceback_lines = traceback.format_exception(*exc_info)
+
+    # XXX: make this configureable
+    trac_url = "http://projects.qmsk.net/irclogs2/trac"
+    
+    # ticket list
+    trac_query = build_link("All tickets", "%s/query" % trac_url)
+
+    # submit ticket
+    submit_args = dict(type='defect')
+    
+    # handle optional components
+    if url :
+        submit_args['url'] = url
+        trac_query_url = build_link("Same URL", "%s/query?url=%s" % (trac_url, urllib.quote(url)))
+    else :
+        trac_query_url = ""
+    
+    if version :
+        submit_args['revision'] = version
+        trac_query_version = build_link("Same version", "%s/query?revision=%s" % (trac_url, urllib.quote(version)))
+
+    else :
+        trac_query_version = ""
+    
+    if exception_str :
+        submit_args['summary'] = truncate(exception_str, 140)
+        trac_query_err = build_link("Same error", "%s/query?summary=%s" % (trac_url, urllib.quote(exception_str.rstrip())))
+
+    else :
+        trac_query_err = ""
+
+    if traceback_lines :
+        # this is big
+        submit_args['description'] = """\
+[Insert any additional information here]
+
+
+= Traceback =
+{{{
+%s
+}}}""" % ''.join(traceback_lines)
+    
+    # the trac newticket URL
+    submit_url = "%s/newticket?%s" % (trac_url, '&amp;'.join('%s=%s' % (urllib.quote(k), urllib.quote(v)) for k, v in submit_args.iteritems()))
+
+    # return
+    return ('500 Internal Server Error', 'text/html; charset=UTF-8', ("""\
+<html><head><title>500 Internal Server Error</title></head><body>
+<h1>Oops!</h1>
+<p>
+    An error occured, which was not logged, and was not reported to anybody. It might be your fault, or it might be mine.
+</p>
+
+<p>
+    You can try:
+    <ol style="list-style-type: lower-alpha">
+        <li><strong>Poking</strong> the administrator of this site to see if they respond</li>
+        <li><strong>Looking</strong> for similar issue tickets with:
+          <ul>
+            <li>%(trac_query)s</li>
+            <li>%(trac_query_url)s</li>
+            <li>%(trac_query_version)s</li>
+            <li>%(trac_query_err)s</li>
+          </ul>
+        </li>
+        <li><strong>Submitting</strong> a new ticket using the following link (quick &amp; easy):</li>
+    </ol>
+</p>
+<pre>
+    <a href="%(submit_url)s">%(submit_url_short)s</a>
+</pre>
+
+<h2>Details:</h2>
+<p>The page you tried to request was:</p>
+<pre>
+    %(url)s
+</pre>
+
+<p>The software version is:</p>
+<pre>
+    %(version_link)s
+</pre>
+
+<p>The error was:</p>
+<pre>
+    %(exception)s
+</pre>
+
+<p>The traceback was:</p>
+<pre>%(traceback)s</pre>
+</body></html>""" % dict(
+        url                 = url if url else 'Unknown',
+        version_link        = version_href if version_href else 'Unknown',
+        exception           = truncate(exception_str, 512),
+        traceback           = cgi.escape(''.join('   ' + line for line in traceback_lines)),
+        trac_query          = trac_query,
+        trac_query_url      = trac_query_url,
+        trac_query_version  = trac_query_version,
+        trac_query_err      = trac_query_err,
+        submit_url          = submit_url,
+        submit_url_short    = truncate(submit_url, 120)
+    )).encode('utf-8'))
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/handlers.py	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,292 @@
+"""
+    Our URL action handlers
+"""
+
+import datetime, calendar, pytz
+
+from qmsk.web import http, template
+
+import urls, channels, helpers
+import preferences as prefs
+from preferences import preferences
+import config, log_search
+
+# load templates from here
+templates = template.TemplateLoader("templates",
+    _helper_class   = helpers.Helpers,
+    urls            = urls,
+    channel_list    = config.LOG_CHANNELS,
+    config          = config,
+)
+
+# return a http.Response for the given text in the given format
+def _render_type (request, channel, lines, type, full_timestamps=False) :
+    """
+        Render the given LogLines as a http.Response in the given format, which is one of:
+            html    - XXX: not supported
+            txt     - Plaintext
+            png     - PNG image
+            rss     - RSS feed
+    """
+
+    # load related preferences
+    formatter = request.prefs['formatter']
+
+    kwargs = dict(
+        full_timestamps = full_timestamps
+    )
+
+    # we can render in various modes...
+    if type in ('html', None) :
+        xxx
+
+    elif type == 'txt' :
+        # plaintext
+        lines = formatter.format_txt(lines, **kwargs)
+
+        # build data
+        data = '\n'.join(data for line, data in lines)
+
+        return http.Response(data, 'text/plain')
+
+    elif type == 'png' :
+        # PNG image
+        png_data = formatter.format_png(lines, **kwargs)
+
+        return http.Response(png_data, 'image/png', charset=None)
+    
+    elif type == 'rss' :
+        # RSS feed
+        rss_data = formatter.format_rss(lines, **kwargs)
+        
+        # XXX: fix to render as unicode?
+        return http.Response(rss_data, 'application/rss+xml', charset=None)
+
+    else :
+        raise http.ResponseError("Unrecognized type: %r" % (type, ))
+
+def _render_date (request, channel, date, lines, type, count, page, max) :
+    """
+        Render the given LogLines as a http.Response for channel_date
+    """
+
+    # type?
+    if type :
+        # special type
+        return _render_type(request, channel, lines, type)
+    
+    else :
+        # format HTML
+        lines = request.prefs['formatter'].format_html(lines)
+
+        # render
+        return templates.render_to_response("channel_date",
+            req             = request,
+            prefs           = request.prefs,
+            channel         = channel,
+            date            = date,
+            count           = count,
+            page            = page,
+            max             = max,
+            lines           = lines,
+            
+            # for prev/next date
+            date_next       = channel.source.get_next_date(date),
+            date_prev       = channel.source.get_prev_date(date),
+        )
+
+@preferences.handler()
+def index (request) :
+    """
+        The topmost index page, display a list of available channels, perhaps some general stats
+    """
+    
+    return templates.render_to_response("index",
+        req             = request,
+        prefs           = request.prefs,
+    )
+
+# XXX: fix this namespace crap
+@preferences.handler()
+def preferences_ (request) :
+    """
+        Preferences editor
+    """
+
+    # POST?
+    if request.is_post() :
+        # update any modified preferences
+        for pref in preferences.pref_list :
+            # get the POST'd value, default = None
+            post_value = request.get_post(pref.name, None)
+
+            # skip non-specified values
+            # XXX: this is to not clobber timezone_offset to None
+            if post_value is None :
+                continue
+
+            # parse the POST'd value, None -> default
+            new_value = request.prefs.parse(pref, post_value)
+
+            # update if given and changed
+            if new_value != request.prefs[pref] :
+                request.prefs.set(pref.name, new_value)
+
+    # render
+    return templates.render_to_response("preferences",
+        req             = request,
+        prefs           = request.prefs,
+        preferences     = prefs,
+    )
+
+def channel_select (request, channel) :
+    """
+        Redirect to the appropriate channel_view
+    """
+   
+    return http.Redirect(urls.channel.build(request, channel=channel))
+
+@preferences.handler(prefs.formatter)
+def channel_last (request, channel, count, formatter, type=None) :
+    """
+        The main channel view page, displaying the most recent lines
+    """
+ 
+    # get latest events
+    lines = channel.source.get_latest(count)
+   
+    # type?
+    if type :
+        # other format
+        return _render_type(request, channel, lines, type)
+
+    else :
+        # format HTML
+        lines = formatter.format_html(lines)
+
+        # render page
+        return templates.render_to_response("channel_last",
+            req             = request,
+            prefs           = request.prefs,
+            channel         = channel,
+            count           = count,
+            lines           = lines,
+        )
+
+@preferences.handler(prefs.formatter, prefs.timezone, prefs.count)
+def channel_link (request, channel, timestamp, formatter, timezone, count, type=None) :
+    """
+        Display channel_date for specific UTC timestamp
+    """
+
+    # convert timestamp to user's timezone
+    timestamp = timestamp.astimezone(timezone)
+
+    # get correct day's correct page of lines
+    page, max, lines = channel.source.get_date_paged(timestamp, count)
+    
+    # render channel_date
+    return _render_date (request, channel, timestamp, lines, type, count, page, max)
+
+@preferences.handler(prefs.timezone)
+def channel_calendar (request, channel, year, month, timezone) :
+    """
+        Display a list of avilable logs for some month
+    """
+
+    # current date as default
+    now = timezone.localize(datetime.datetime.now())
+
+    # target year/month
+    target = timezone.localize(datetime.datetime(
+        year    = year if year else now.year,
+        month   = month if month else now.month,
+        day     = 1
+    ))
+
+    # display calendar
+    return templates.render_to_response("channel_calendar",
+        req             = request,
+        prefs           = request.prefs,
+        channel         = channel,
+        month           = target,
+    )
+
+@preferences.handler(prefs.count, prefs.timezone)
+def channel_date (request, channel, date, count, timezone, page=1, type=None) :
+    """
+        Display all log data for the given date
+    """
+    
+    # convert date to user's timezone
+    date = timezone.localize(date)
+
+#    print
+#    print "channel_date: date=%s" % date
+
+    # get that day's events, either paged or not
+    if page :
+        page, max, lines = channel.source.get_date_paged(date, count, page)
+        
+    else :
+        lines = channel.source.get_date(date)
+        max = None
+
+    # render channel_date
+    return _render_date (request, channel, date, lines, type, count, page, max)
+
+@preferences.handler(prefs.formatter, prefs.count)
+def channel_search (request, channel, formatter, count, q=None, page=1, max=1, type=None, t=None) :
+    """
+        Display the search form for the channel for GET, or do the search for POST.
+    """
+
+    # calculate skip offset from page/count
+    skip = (page - 1) * count
+
+    # got a search query?
+    if q :
+        # attribute targets
+        targets = dict(('search_%s' % target, True) for target in t if target in ('msg', 'nick')) if t else {}
+
+        try :
+            # do search
+            lines = log_search.get_index().search_simple(channel, q, count, skip, **targets)
+
+            # update max?
+            if max and page > max :
+                max = page
+        
+        except log_search.NoResultsFound :
+            # no results
+            lines = None
+
+    else :
+        # just display the search form
+        lines = None
+ 
+    # type?
+    if type and lines :
+        # special type
+        return _render_type(request, channel, lines, type, full_timestamps=True)
+    
+    else :
+        # format lines to HTML if any
+        if lines :
+            # format
+            lines = formatter.format_html(lines, full_timestamps=True)
+
+        # render page
+        return templates.render_to_response("channel_search",
+            req             = request,
+            prefs           = request.prefs,
+            channel         = channel,
+            search_query    = q,
+            search_targets  = t,
+            count           = count,
+            page            = page,
+            skip            = skip,
+            max             = max,
+            lines           = lines,
+        )
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/helpers.py	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,243 @@
+"""
+    Some additional helpers
+"""
+
+import datetime
+import calendar as _calendar
+
+import qmsk.web.helpers
+
+import preferences, urls, config, version
+
+class Helpers (qmsk.web.helpers.Helpers) :
+    """
+        Our set of helpers, inheriting from base helpers
+    """
+
+    # set contructor...
+    set = set
+
+    # reference to calendar instance
+    calendar = _calendar.Calendar()
+
+    # list of (month_num, month_name) for the months in the year
+    months = list(enumerate(_calendar.month_name))[1:]
+    
+    def version_link (self) :
+        """
+            Returns a <a href> representing this version of the software
+        """
+
+        return version.version_link_hg(config.HGWEB_URL, config.HG_WC_PATH)
+
+    def tz_name (self, tz) :
+        """
+            Returns a string describing the given timezone
+        """
+
+        return self.now().strftime(config.TIMEZONE_FMT)
+
+    def fmt_month (self, date) :
+        """
+            Formats a month
+        """
+
+        return date.strftime(config.MONTH_FMT)
+        
+    def fmt_weekday (self, wday) :
+        """
+            Formats an abbreviated weekday name
+        """
+
+        return _calendar.day_abbr[wday]
+
+    def build_date (self, month, mday) :
+        """
+            Returns a datetime.datetime for the given (month.year, month.month, mday)
+        """
+
+        return self.ctx['prefs'][preferences.timezone].localize(datetime.datetime(month.year, month.month, mday))
+    
+    def now (self) :
+        """
+            Build current time
+        """
+
+        return self.ctx['prefs'][preferences.timezone].localize(datetime.datetime.now())
+
+    def today (self) :
+        """
+            Build today's date
+        """
+        
+        return self.now().date()
+
+    def is_today (self, dt) :
+        """
+            Checks if the given datetime.datetime is today
+        """
+
+        # compare with current date
+        return dt.date() == self.today()
+    
+    def is_this_month (self, month) :
+        """
+            Checks the given month is the current month
+        """
+
+        today = self.today()
+
+        return (month.year == today.year and month.month == today.month)
+
+    @staticmethod
+    def _wrap_year (year, month) :
+        """
+            Wraps month to between [1, 12], spilling overflow/underflow by to year.
+
+            Returns (year, month)
+        """
+        
+        # underflow?
+        if month == 0 :
+            # wrap to previous year
+            return (year - 1, 12)
+        
+        # overflow?
+        elif month == 13 :
+            # wrap to next year
+            return (year + 1, 1)
+        
+        # sane value
+        elif 1 <= month <= 12 :
+            return (year, month)
+        
+        # insane value
+        else :
+            assert False, "invalid year/month: %d/%d" % (year, month)
+
+    def prev_month (self, month) :
+        """
+            Returns the month preceding the given one (as a datetime.datetime)
+        """
+        
+        # previous month
+        y, m = self._wrap_year(month.year, month.month - 1)
+        
+        # build datetime
+        return datetime.datetime(year=y, month=m, day=1, tzinfo=month.tzinfo)
+
+    def next_month (self, month) :
+        """
+            Returns the month following the given one (as a datetime.datetime)
+        """
+        
+        # previous month
+        y, m = self._wrap_year(month.year, month.month + 1)
+        
+        # build datetime
+        return datetime.datetime(year=y, month=m, day=1, tzinfo=month.tzinfo)
+    
+    def fmt_time (self, time=None) :
+        """
+            Format given time, or current time
+        """
+        
+        # defaults
+        if not time :
+            time = self.now()
+
+        return time.strftime(self.ctx['prefs'][preferences.time_format])
+
+    def fmt_date (self, date=None) :
+        """
+            Format given date, or current date
+        """
+        
+        # defaults
+        if not date :
+            date = self.now()
+
+        return date.strftime(self.ctx['prefs'][preferences.date_format])
+
+    def url (self, url, **params) :
+        """
+            Build URL with our request object
+        """
+
+        return url.build(self.ctx['req'], **params)
+
+    # old name
+    build_url = url
+
+    def utc_timestamp (self, dtz) :
+        """
+            Build an UTC timestamp from the given datetime
+        """
+
+        return urls.types['ts'].build(dtz)
+    
+    def skip_next (self, count, skip) :
+        """
+            Return skip offset for next page
+        """
+
+        return count + skip
+    
+    def skip_page (self, count, page) :
+        """
+            Skip to page
+        """
+
+        if page :
+            return count * page
+
+        else :
+            return None
+
+    def skip_prev (self, count, skip) :
+        """
+            Return skip offset for previous page, None for first page
+        """
+
+        if skip > count :
+            return skip - count
+
+        else :
+            return None
+
+    def max (self, *values) :
+        """
+            Returns the largest of the given values
+        """
+
+        return max(values)
+    
+    def select_options (self, key_values, selected_key=None) :
+        """
+            Render a series of <option> tags for <select>.
+
+            The given key_values is an iterable of (key, value) pairs, key may be None if it's the same as value.
+        """
+
+        return '\n'.join(
+            '\t<option%s%s>%s</option>' % (
+                ' value="%s"' % key if key is not None else '',
+                ' selected="selected"' if (key if key is not None else value) == selected_key else '',
+                value
+            ) for key, value in key_values
+        )
+    
+    def prev_date (self, date) :
+        """
+            Returns the previous date for the given datetime-date
+        """
+
+        return datetime.datetime(date.year, date.month, date.day, tzinfo=date.tzinfo) - datetime.timedelta(days=1)
+
+    def next_date (self, date) :
+        """
+            Returns the previous date for the given datetime-date
+        """
+
+        return datetime.datetime(date.year, date.month, date.day, tzinfo=date.tzinfo) + datetime.timedelta(days=1)
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/log_channel.py	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,54 @@
+"""
+    A channel represents a series of log events, stored in some log source
+"""
+
+import log_search
+
+class LogChannel (object) :
+    """
+        A single IRC channel, logged to some specific place
+    """
+
+    def __init__ (self, id, network, name, source) :
+        """
+            Initialize this channel from the given identifier key, network name, channel name, and LogSource
+        """
+        
+        # store
+        self.id = id
+        self.network = network
+        self.name = name
+        self.source = source
+
+        # bind source
+        self.source.bind_channel(self)
+    
+    @property
+    def title (self) :
+        """
+            Title is 'Network - #channel'
+        """
+
+        return "%s - %s" % (self.network, self.name)
+    
+    def search (self, query) :
+        """
+            Perform a search on this channel, returning a sequence of LogLines
+        """
+
+        return log_search.index.search_simple(self, query)
+
+    def __str__ (self) :
+        """
+            Returns self.title
+        """
+
+        return self.title
+
+    def __repr__ (self) :
+        """
+            Uses self.id
+        """
+
+        return "LogChannel(%s)" % (self.id, )
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/log_formatter.py	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,257 @@
+"""
+    Format LogLines into some other representation
+"""
+
+import re, xml.sax.saxutils
+
+from log_line import LogTypes
+from log_formatter_pil import PILImageFormatter
+from log_formatter_rss import RSSFormatter
+
+class LogFormatter (object) :
+    """
+        Provides a method to format series of LogLines into various output formats, with varying themes.
+    """
+
+    # machine-readable name
+    name = None
+
+    # human-readable name
+    title = None
+
+    ## parameters
+    # use a fixed-width font for HTML output
+    html_fixedwidth = True
+
+    def __init__ (self, tz, timestamp_fmt, img_ttf_path, img_font_size) :
+        """
+            Initialize to format timestamps with the given timezone and timestamp.
+
+            Use the given TTF font to render image text with the given size, if given, otherwise, a default one.
+        """
+        
+        # store
+        self.tz = tz
+        self.timestamp_fmt = timestamp_fmt
+        self.img_ttf_path = img_ttf_path
+        self.img_font_size = img_font_size
+        
+        # XXX: harcoded
+        self.date_fmt = '%Y-%m-%d'
+    
+    def _format_line_text (self, line, template_dict, type=None, full_timestamp=False, **extra) :
+        """
+            Format the given line as text, using the given { type: string template } dict.
+            
+            If type is given, then it overrides line.type
+
+            Any additional keyword args will also be available for the template to use
+        """
+
+        # default type?
+        if type is None :
+            type = line.type
+            
+        # look up the template
+        if type in template_dict :
+            template = template_dict[type]
+
+        else :
+            raise Exception("Format template not defined for type: %s" % LogTypes.name_from_code(type))
+        
+        # convert timestamp into display timezone
+        dtz = line.timestamp.astimezone(self.tz)
+        
+        # full timestamps?
+        if full_timestamp :
+            # XXX: let the user define a 'datetime' format instead?
+            timestamp_fmt = self.date_fmt + ' ' + self.timestamp_fmt
+
+        else :
+            timestamp_fmt = self.timestamp_fmt
+        
+        # breakdown source
+        source_nickname, source_username, source_hostname, source_chanflag = line.source
+        target_nickname = line.target
+        
+        # format with dict
+        return template % dict(
+            channel_name    = line.channel.name,
+            datetime        = dtz.strftime('%a %b %d %H:%M:%S %Y'),
+            date            = dtz.strftime(self.date_fmt),
+            timestamp       = dtz.strftime(timestamp_fmt),
+            source_nickname = source_nickname,
+            source_username = source_username,
+            source_hostname = source_hostname,
+            source_chanflag = source_chanflag,
+            target_nickname = target_nickname,
+            message         = line.data,
+            **extra
+        )
+    
+    def format_txt (self, lines, full_timestamps=False) :
+        """
+            Format given lines as plaintext.
+
+            If full_timestamps is given, the output will contain full timestamps with both date and time.
+
+            No trailing newlines.
+        """
+
+        abstract
+
+    def format_html (self, lines, full_timestamps=False) :
+        """
+            Format as HTML.
+            
+            See format_txt for information about arguments
+        """
+
+        abstract
+    
+    def format_png (self, lines, full_timestamps=False) :
+        """
+            Format as a PNG image, returning the binary PNG data
+        """
+
+        abstract
+    
+    def format_rss (self, lines, full_timestamps=False) :
+        """
+            Format as an XML RSS document
+        """
+        
+        abstract
+
+class BaseHTMLFormatter (LogFormatter) :
+    """
+        Implements some HTML-formatting utils
+    """
+    
+    # parameters
+    html_fixedwidth = True
+
+    # regexp to match URLs
+    URL_REGEXP = re.compile(r"http://\S+")
+
+    def _process_links (self, line) :
+        """
+            Processed the rendered line, adding in <a href>'s for things that look like URLs, returning the new line.
+
+            The line should already be escaped
+        """
+
+        def _encode_url (match) :
+            # encode URL
+            url_html = match.group(0)
+            url_link = xml.sax.saxutils.unescape(url_html)
+
+            return '<a href="%(url_link)s">%(url_html)s</a>' % dict(url_link=url_link, url_html=url_html)
+
+        return self.URL_REGEXP.sub(_encode_url, line)
+ 
+    def format_html (self, lines, **kwargs) :
+        """
+            Just uses format_txt, but processes links, etc
+        """
+        
+        # format using IrssiTextFormatter
+        for line, txt in self.format_txt(lines, **kwargs) :
+            # escape HTML
+            html = xml.sax.saxutils.escape(txt)
+
+            # process links
+            html = self._process_links(html)
+
+            # yield
+            yield line, html
+
+   
+class IrssiTextFormatter (RSSFormatter, PILImageFormatter, LogFormatter) :
+    """
+        Implements format_txt for irssi-style output
+    """
+
+    # format definitions by type
+    __FMT = {
+        LogTypes.RAW        : "%(timestamp)s %(data)s",
+        LogTypes.LOG_OPEN   : "--- Log opened %(datetime)s",
+        LogTypes.LOG_CLOSE  : "--- Log closed %(datetime)s",
+        'DAY_CHANGED'       : "--- Day changed %(date)s",
+
+        LogTypes.MSG        : "%(timestamp)s <%(source_chanflag)s%(source_nickname)s> %(message)s",
+        LogTypes.NOTICE     : "%(timestamp)s -%(source_nickname)s- %(message)s",
+        LogTypes.ACTION     : "%(timestamp)s  * %(source_nickname)s %(message)s",
+
+        LogTypes.JOIN       : "%(timestamp)s -!- %(source_nickname)s [%(source_username)s@%(source_hostname)s] has joined %(channel_name)s",
+        LogTypes.PART       : "%(timestamp)s -!- %(source_nickname)s [%(source_username)s@%(source_hostname)s] has left %(channel_name)s [%(message)s]",
+        LogTypes.KICK       : "%(timestamp)s -!- %(target_nickname)s was kicked from %(channel_name)s by %(source_nickname)s [%(message)s]",
+        LogTypes.MODE       : "%(timestamp)s -!- mode/%(channel_name)s [%(message)s] by %(source_nickname)s",
+
+        LogTypes.NICK       : "%(timestamp)s -!- %(source_nickname)s is now known as %(target_nickname)s",
+        LogTypes.QUIT       : "%(timestamp)s -!- %(source_nickname)s [%(source_username)s@%(source_hostname)s] has quit [%(message)s]",
+
+        LogTypes.TOPIC      : "%(timestamp)s -!- %(source_nickname)s changed the topic of %(channel_name)s to: %(message)s",
+        'TOPIC_UNSET'       : "%(timestamp)s -!- Topic unset by %(source_nickname)s on %(channel_name)s",
+
+        LogTypes.SELF_NOTICE: "%(timestamp)s -%(source_nickname)s- %(message)s",
+        LogTypes.SELF_NICK  : "%(timestamp)s -!- %(source_nickname)s is now known as %(target_nickname)s",
+
+        LogTypes.NETSPLIT_START : 
+                              "%(timestamp)s -!- Netsplit %(source_hostname)s <-> %(target_nickname)s quits: %(_netsplit_targets)s",
+        LogTypes.NETSPLIT_END :
+                              "%(timestamp)s -!- Netsplit over, joins: %(_netsplit_targets)s",
+    }
+
+    def format_txt (self, lines, full_timestamps=False) :
+        # ...handle each line
+        for line in lines :
+            # extra args
+            extra = {}
+            
+            # default to line.type
+            type = line.type
+
+            # special formatting for unset-Topic
+            if line.type == LogTypes.TOPIC and line.data is None :
+                type = 'TOPIC_UNSET'
+            
+            # format netsplit stuff
+            elif line.type & LogTypes._NETSPLIT_MASK :
+                # format the netsplit-targets stuff
+                extra['_netsplit_targets'] = line.data
+
+            # using __TYPES
+            yield line, self._format_line_text(line, self.__FMT, type, full_timestamps, **extra)
+
+class IrssiFormatter (BaseHTMLFormatter, IrssiTextFormatter) :
+    """
+        Implements plain black-and-white irssi-style formatting
+    """
+    
+    # name
+    name = 'irssi'
+    title = "Irssi (plain)"
+
+class DebugFormatter (BaseHTMLFormatter) :
+    """
+        Implements a raw debug-style formatting of LogLines
+    """
+
+    # name
+    name = 'debug'
+    title = "Raw debugging format"
+    
+    def format_txt (self, lines, full_timestamps=False) :
+        # iterate
+        for line in lines :
+            # just dump
+            yield line, unicode(line)
+
+def by_name (name) :
+    """
+        Lookup and return a class LogFormatter by name
+    """
+
+    return FORMATTERS[name]
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/log_formatter_pil.py	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,81 @@
+"""
+    Use of PIL to render the image formatting stuff
+"""
+
+from PIL import Image, ImageDraw, ImageFont
+
+from cStringIO import StringIO
+
+class PILImageFormatter (object) :
+    """
+        Mixin for LogFormatter that implements the basic image-rendering operations on top of format_txt
+    """
+    
+    # the font we load
+    font = None
+
+    # line spacing in pixels
+    LINE_SPACING = 1
+
+    def _load_font (self) :
+        """
+            Use the configured img_ttf_path for a TrueType font, or a default one
+        """
+
+        if self.font :
+            pass
+        
+        elif self.img_ttf_path :
+            # load truetype with configured size
+            self.font = ImageFont.truetype(self.img_ttf_path, self.img_font_size)
+
+        else :
+            # default
+            self.font = ImageFont.load_default()
+
+        return self.font
+
+    def format_png (self, lines, **kwargs) :
+        """
+            Build and return a PNG image of the given lines, using format_txt
+        """
+
+        # load font
+        font = self._load_font()
+
+        # build list of plain-text line data
+        lines = list(data for line, data in self.format_txt(lines, **kwargs))
+        
+        # lines sizes
+        line_sizes = [font.getsize(line) for line in lines]
+
+        # figure out how wide/high the image will be
+        width = max(width for width, height in line_sizes)
+        height = sum(height + self.LINE_SPACING for width, height in line_sizes)
+
+        # create new B/W image
+        img = Image.new('L', (width, height), 0xff)
+
+        # drawer
+        draw = ImageDraw.Draw(img)
+        
+        # starting offset
+        offset_y = 0
+
+        # draw the lines
+        for line, (width, height) in zip(lines, line_sizes) :
+            # draw
+            draw.text((0, offset_y), line, font=font)
+
+            # next offset
+            offset_y += height + self.LINE_SPACING
+        
+        # output buffer
+        buf = StringIO()
+
+        # save
+        img.save(buf, 'png')
+
+        # return data
+        return buf.getvalue()
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/log_formatter_rss.py	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,41 @@
+"""
+    Uses PyRSS2Gen to generate XML RSS documents
+"""
+
+import PyRSS2Gen as RSS2Gen
+import datetime, pytz
+
+class RSSFormatter (object) :
+    """
+        Mixin for LogFormatter that implements the basic RSS-rendering stuff on top of format_html
+    """
+
+    def format_rss (self, lines, **kwargs) :
+        """
+            Process using format_html
+        """
+        
+        # build the RSS2 object and return the XML
+        return RSS2Gen.RSS2(
+            title           = "IRC RSS feed",
+            link            = "http://irclogs.qmsk.net/",
+            description     = "A stupid RSS feed that nobody sane would ever use",
+            
+            # XXX: GMT
+            lastBuildDate   = datetime.datetime.utcnow(),
+
+            items           = [
+                RSS2Gen.RSSItem(
+                    # use the formatted HTML data as the title
+                    title       = html_data,
+
+                    # timestamp
+                    pubDate     = line.timestamp.astimezone(pytz.utc),
+
+                    # link
+                    link        = "http://xxx/",
+
+                ) for line, html_data in self.format_html(lines, **kwargs)
+            ]
+        ).to_xml('utf8')
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/log_line.py	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,186 @@
+"""
+    An IRC logfile consists of a series of lines/events
+"""
+
+class LogTypes :
+    """
+        Definitions of the various LogLines types:
+
+            LogTypes.RAW
+            LogTypes.LOG_OPEN
+            LogTypes.LOG_CLOSE
+
+            LogTypes.MSG
+            LogTypes.NOTICE
+            LogTypes.ACTION
+
+            LogTypes.JOIN
+            LogTypes.PART
+            LogTypes.KICK
+            LogTypes.MODE
+
+            LogTypes.NICK
+            LogTypes.QUIT
+
+            LogTypes.TOPIC
+
+            LogTypes.SELF_NOTICE
+            LogTypes.SELF_NICK
+    """
+ 
+    # list of LogType values by name
+    LIST = [
+        ## special
+        # unknown type, may or may not have a timestamp, no source, only data
+        ('RAW',         0x01),
+
+        # log opened
+        ('LOG_OPEN',    0x02),
+
+        # log closed
+        ('LOG_CLOSE',   0x03),
+
+        ## messages
+        # <source> sent message <data> to <channel>
+        ('MSG',         0x10),
+        
+        # <source> sent notice with message <data> to <channel>
+        ('NOTICE',      0x11),
+
+        # <source> sent CTCP action with message <data> to <channel>
+        ('ACTION',      0x12),
+        
+        ## user-channel stats
+        # <source> joined <channel>
+        ('JOIN',        0x21),
+
+        # <source> left <channel> with message <data>
+        ('PART',        0x22),
+
+        # <source> kicked <target> from <channel> with message <data>
+        ('KICK',        0x25),
+     
+        # <source> changed modes on <channel> with modestring <data>
+        ('MODE',        0x26),
+        
+        ## user status
+        # <source> changed nickname to <target>
+        ('NICK',        0x31),
+
+        # <source> quit the network with quit-message <data>
+        ('QUIT',        0x32),
+
+        ## general channel status
+        # <source> changed the topic of <channel> to <data>
+        # data may be None if the topic was unset
+        ('TOPIC',       0x41),
+
+        ## our own actions
+        # we (<source>) sent a notice with message <data> to <channel>
+        ('SELF_NOTICE', 0x51),
+
+        # we (<source>) changed nickname to <target>
+        ('SELF_NICK',   0x52),
+
+        ## slightly weirder bits
+        # netsplit between <source_hostname> and <target_hostname>, <data> is a space-separated list of <chanflags><nickname>s affected
+        # the last item in the list of nicknames may also be of the form "+<count>", where count is the number of additional, but hidden, nicknames affected
+        ('NETSPLIT_START',  0x61),
+
+        # netsplit over, <data> is a list of users affected, see NETSPLIT_START
+        ('NETSPLIT_END',    0x062),
+    ]
+    
+    @classmethod
+    def name_from_code (cls, code) :
+        """
+            Looks up a LogType name by code
+        """
+
+        return dict((type, name) for name, type in cls.LIST)[code]
+
+# apply as attributes
+for name, code in LogTypes.LIST :
+    setattr(LogTypes, name, code)
+
+# masks
+LogTypes._NETSPLIT_MASK = 0x60
+
+class LogLine (object) :
+    """
+        An event on some specific channel
+    """
+
+    # the LogChannel
+    channel = None
+
+    # the offset, only garunteed to be unique for a specific channel and date
+    offset = None
+
+    # the event type, as defiend in LogTypes
+    type = None
+
+    # the UTC timestamp of the event
+    timestamp = None
+
+    # the source, this should be a (nickname, username, hostname, chanflags) tuple
+    source = None
+
+    # possible target nickname for certain types (kick, nick)
+    target = None
+
+    # associated data (message, etc)
+    data = None
+    
+    def __init__ (self, channel, offset, type, timestamp, source, target, data) :
+        """
+            Initialize with given values
+        """
+        
+        self.channel = channel
+        self.offset = offset
+        self.type = type
+        self.timestamp = timestamp
+        self.source = source
+        self.target = target
+        self.data = data
+    
+    def format_type (self) :
+        """
+            Formats type as a string code
+        """
+
+        return LogTypes.name_from_code(self.type)
+
+    def format_source (self) :
+        """
+            Formats source as [<chanflags>][<nickname>][!<username>][@<hostname>], omitting those parts that are missing.
+
+            If all parts are None, this returns the empty string
+        """
+
+        nick, user, host, flags = self.source
+
+        return "%s%s%s%s" % (
+            flags if flags and flags != ' ' else '',
+            nick if nick else '',
+            '!' + user if user else '',
+            '@' + host if host else ''
+        )
+   
+    def __unicode__ (self) :
+        return '\t'.join((
+            self.channel.name,
+            str(self.offset),
+            self.format_type(),
+            str(self.timestamp),
+            self.format_source(),
+            str(self.target),
+            unicode(self.data)
+        ))
+
+    def __repr__ (self) :
+        return "LogLine(%r, %s, %-12s, %s, %-35s, %-10s, %r)" % (
+            self.channel, self.offset, self.format_type(), self.timestamp, self.format_source(), self.target, self.data
+        )
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/log_parser.py	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,233 @@
+"""
+    Parse log data into log_events
+"""
+
+import re
+import datetime
+
+from log_line import LogTypes, LogLine
+
+class LogParseError (Exception) :
+    """
+        Parsing some line failed
+    """
+
+    def __init__ (self, line, offset, message) :
+        super(LogParseError, self).__init__("%r@%s: %s" % (line, offset, message))
+
+class LogParser (object) :
+    """
+        Abstract interface
+    """
+
+    def __init__ (self, tz, timestamp_fmt="%H:%M:%S") :
+        """
+            Setup the parser to use the given format for line timestamps, which are of the given timezone
+        """
+
+        self.tz = tz
+        self.timestamp_fmt = timestamp_fmt
+
+    def parse_lines (self, channel, lines, date=None, starting_offset=None) :
+        """
+            Parse the given (iterable) lines of unicode text into a LogEvent, no trailing newline.
+
+            Channel is the LogChannel that these lines belong to.
+
+            Offset is the starting offset, and may be None to not use it.
+            
+            Giving date lets the parser build full timestamps, otherwise, unless line timestamps have full date
+            information, event timestamps will have a date component of 1900/1/1.
+        """
+
+        abstract
+
+class IrssiParser (LogParser) :
+    """
+        A parser for irssi logfiles
+    """
+    
+    # timestamp prefix, with trailing space
+    _TS = r'(?P<timestamp>[a-zA-Z0-9: ]+[a-zA-Z0-9])\s*'
+
+    # subexpression parts
+    _NICK = r'(?P<nickname>.+?)'
+    _NICK2 = r'(?P<nickname2>.+?)'
+    _TARGET = r'(?P<target>.+?)'
+    _CHAN = r'(?P<channel>.+?)'
+    _CHAN2 = r'(?P<channel2>.+?)'
+    _USERHOST = r'(?P<username>.*?)@(?P<hostname>.*?)'
+    _MSG = r'(?P<message>.*)'
+    _SRV1 = r'(?P<server1>.+?)'
+    _SRV2 = r'(?P<server2>.+?)'
+
+    # regular expressions for matching lines, by type
+    TYPE_EXPRS = (
+        (   LogTypes.LOG_OPEN,      r'--- Log opened (?P<datetime>.+)'                              ),
+        (   LogTypes.LOG_CLOSE,     r'--- Log closed (?P<datetime>.+)'                              ),
+        (   LogTypes.MSG,           _TS + r'<(?P<flags>.)' + _NICK + '> ' + _MSG                   ),
+        (   LogTypes.NOTICE,        _TS + r'-' + _NICK + ':' + _CHAN + '- ' + _MSG                 ),
+        (   LogTypes.ACTION,        _TS + r'\* ' + _NICK + ' ' + _MSG                             ),
+        (   LogTypes.JOIN,          _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has joined ' + _CHAN                               ), 
+        (   LogTypes.PART,          _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has left ' + _CHAN + ' \[(?P<message>.*?)\]'       ),
+        (   LogTypes.KICK,          _TS + r'-!- ' + _TARGET + ' was kicked from ' + _CHAN + ' by ' + _NICK + ' \[(?P<message>.*?)\]'   ),
+        # XXX: use hostname instead of nickname for ServerMode
+        (   LogTypes.MODE,          _TS + r'-!- (mode|ServerMode)/' + _CHAN + ' \[(?P<mode>.+?)\] by (?P<nickname>\S+)'                ),
+        (   LogTypes.NICK,          _TS + r'-!- ' + _NICK + ' is now known as (?P<target>\S+)'                                         ),
+        (   LogTypes.QUIT,          _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has quit \[(?P<message>.*?)\]'                     ),
+        (   LogTypes.TOPIC,         _TS + r'-!- (' + _NICK + ' changed the topic of ' + _CHAN + ' to: (?P<topic>.*)|Topic unset by ' + _NICK2 + ' on ' + _CHAN2 + ')'    ),
+
+        (   LogTypes.SELF_NOTICE,   _TS + r'\[notice\(' + _CHAN + '\)\] ' + _MSG                   ),
+        (   LogTypes.SELF_NICK,     _TS + r'-!- You\'re now known as (?P<target>\S+)'              ),
+
+        (   LogTypes.NETSPLIT_START,    _TS + r'-!- Netsplit ' + _SRV1 + ' <-> ' + _SRV2 + ' quits: (?P<nick_list>[^(]+)( \(\+(?P<count>\d+) more,\S+\))?'),
+        (   LogTypes.NETSPLIT_END,      _TS + r'-!- Netsplit over, joins: (?P<nick_list>[^(]+)( \(\+(?P<count>\d+) more\))?'              ),
+
+        (   'DAY_CHANGED',          r'--- Day changed (?P<date>.+)'                                 ),
+    )
+
+    # precompile
+    TYPE_REGEXES = [(type, re.compile(expr)) for type, expr in TYPE_EXPRS]
+
+    def parse_line (self, channel, line, date, offset=None) :
+        """
+            Parse a single line, and return the resulting LogLine, or None, to ignore the line.
+
+            Uses self.TYPE_REGEXES to do the matching
+        """
+
+        # empty line
+        if not line :
+            return
+
+        # look for match
+        match = type = None
+
+        # test each type
+        for type, regex in self.TYPE_REGEXES :
+            # attempt to match
+            match = regex.match(line)
+            
+            # found, break
+            if match :
+                break
+        
+        # no match found?
+        if not match :
+            raise LogParseError(line, offset, "Line did not match any type")
+        
+        # match groups
+        groups = match.groupdict(None)
+
+        # parse timestamp
+        if 'datetime' in groups :
+            # parse datetime using default asctime() format
+            dt = datetime.datetime.strptime(groups['datetime'], '%a %b %d %H:%M:%S %Y')
+
+        elif 'timestamp' in groups :
+            # parse timestamp into naive datetime
+            dt = datetime.datetime.strptime(groups['timestamp'], self.timestamp_fmt)
+            
+            # override date?
+            if date :
+                dt = dt.replace(year=date.year, month=date.month, day=date.day)
+
+        elif 'date' in groups :
+            # parse date-only datetime
+            dt = datetime.datetime.strptime(groups['date'], '%a %b %d %Y')
+
+        else :
+            # no timestamp !?
+            raise LogParseError(line, offset, "No timestamp")
+
+        # now localize with timezone
+        dtz = self.tz.localize(dt)
+
+        # channel, currently unused
+        channel_name = (groups.get('channel') or groups.get('channel2'))
+
+        # source
+        if 'server1' in groups :
+            source = (None, None, groups.get('server1'), None)
+
+        else :
+            source = (groups.get('nickname') or groups.get('nickname2'), groups.get('username'), groups.get('hostname'), groups.get('flags'))
+
+        # target
+        if 'server2' in groups :
+            target = groups.get('server2')
+
+        else :
+            target = groups.get('target')
+
+        # data
+        if 'message' in groups :
+            data = groups['message']
+        
+        elif 'mode' in groups :
+            data = groups['mode']
+
+        elif 'topic' in groups :
+            data = groups['topic']
+        
+        elif 'nick_list' in groups :
+            # split into components
+            list = groups['nick_list'].split(', ')
+            
+            # additional count?
+            if 'count' in groups and groups['count'] :
+                list.append('+%d' % int(groups['count']))
+            
+            # join
+            data = ' '.join(list)
+
+        else :
+            data = None
+        
+        # custom types?
+        if type == 'DAY_CHANGED' :
+            # new date
+            date = dtz
+        
+        else :
+            # build+return (date, LogLine)
+            return date, LogLine(channel, offset, type, dtz, source, target, data)
+
+    def parse_lines (self, channel, lines, date=None, starting_offset=None) :
+        """
+            Parse the given lines, yielding LogEvents. 
+        """
+
+        for offset, line in enumerate(lines) :
+            # offset?
+            if starting_offset :
+                offset = starting_offset + offset
+
+            else :
+                offset = None
+            
+            # try and parse
+            try :
+                # get None or (date, line)
+                line_info = self.parse_line(channel, line, date, offset)
+
+           # passthrough LogParseError's
+            except LogParseError :
+                raise
+            
+            # wrap other errors as LogParseError
+            except Exception, e :
+                raise LogParseError(line, offset, "Parsing line failed: %s" % e)
+            
+            else :
+                # nothing?
+                if not line_info :
+                    continue
+                
+                # unpack, update date
+                date, line = line_info
+                
+                # yield
+                yield line
+
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/log_search.py	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,367 @@
+"""
+    Full-text searching of logs
+"""
+
+import datetime, calendar, pytz
+import os.path
+
+import HyperEstraier as hype
+
+import log_line, utils, config
+
+class LogSearchError (Exception) :
+    """
+        General search error
+    """
+
+    pass
+
+class SearchIndexError (LogSearchError) :
+    """
+        Error manipulating the index
+    """
+
+    def __init__ (self, msg, db) :
+        """
+            Build the error from the given message + HyperEstraier.Database
+        """
+
+        super(SearchIndexError, self).__init__("%s: %s" % (msg, db.err_msg(db.error())))
+
+class NoResultsFound (LogSearchError) :
+    """
+        No results found
+    """
+
+    pass
+
+class LogSearchIndex (object) :
+    """
+        An index on the logs for a group of channels.
+
+        This uses Hyper Estraier to handle searching, whereby each log line is a document (yes, I have a powerful server).
+
+        These log documents have the following attributes:
+            @uri                - channel/date/line
+            channel             - channel code
+            type                - the LogType id
+            timestamp           - UTC timestamp
+            source_nickname     - source nickname
+            source_username     - source username
+            source_hostname     - source hostname
+            source_chanflags    - source channel flags
+            target_nickname     - target nickname
+
+        Each document then has a single line of data, which is the log data message
+    """
+
+    def __init__ (self, channels, path, mode='r') :
+        """
+            Open the database at the given path, with the given mode:
+                First char:
+                    r       - read, error if not exists
+                    w       - write, create if not exists
+                    a       - write, error if not exists
+                    c       - create, error if exists
+                
+                Additional chars:
+                    trunc   - truncate if exists
+                    +       - read as well as write
+                    ?       - non-blocking lock open, i.e. it fails if already open
+            
+            Channels is the ChannelList.
+        """
+
+        # store
+        self.channels = channels
+        self.path = path
+        self.mode = mode
+
+        # check it does not already exist?
+        if mode in 'c' and os.path.exists(path) :
+            raise LogSearchError("Index already exists: %s" % (path, ))
+        
+        # mapping of { mode -> flags }
+        mode_to_flag = {
+            'r':    hype.Database.DBREADER,
+            'w':    hype.Database.DBWRITER | hype.Database.DBCREAT,
+            'a':    hype.Database.DBWRITER,
+            'c':    hype.Database.DBWRITER | hype.Database.DBCREAT,
+        }
+
+        # flags to use, standard modes
+        flags = mode_to_flag[mode[0]]
+ 
+        # mode-flags
+        if '?' in mode :
+            # non-blocking locking
+            flags |= hype.Database.DBLCKNB
+        
+        elif '+' in mode :
+            # read
+            flags |= hype.Database.DBREADER
+
+        elif 'trunc' in mode :
+            # truncate. Dangerous!
+            flags |= hype.Database.DBTRUNC
+       
+        # make instance
+        self.db = hype.Database()
+        
+        # open
+        if not self.db.open(path, flags) :
+            raise SearchIndexError("Index open failed: %s, mode=%s, flags=%#06x" % (path, mode, flags), self.db)
+    
+    def close (self) :
+        """
+            Explicitly close the index, this is done automatically on del
+        """
+
+        if not self.db.close() :
+            raise SearchIndexError("Index close failed", self.db)
+
+    def insert (self, channel, lines) :
+        """
+            Adds a sequence of LogLines from the given LogChannel to the index, and return the number of added items
+        """
+        
+        # count from zero
+        count = 0
+        
+        # iterate
+        for line in lines :
+            # insert
+            self.insert_line(channel, line)
+
+            # count
+            count += 1
+        
+        # return
+        return count
+
+    def insert_line (self, channel, line) :
+        """
+            Adds a single LogLine for the given LogChannel to the index
+        """
+
+        # validate the LogChannel
+        assert channel.id
+
+        # validate the LogLine
+        assert line.offset
+        assert line.timestamp
+
+        # create new document
+        doc = hype.Document()
+
+        # line date
+        date = line.timestamp.date()
+
+        # ensure that it's not 1900
+        assert date.year != 1900
+
+        # add URI
+        doc.add_attr('@uri',        "%s/%s/%d" % (channel.id, date.strftime('%Y-%m-%d'), line.offset))
+
+        # add channel id
+        doc.add_attr('channel',     channel.id)
+
+        # add type
+        doc.add_attr('type',        str(line.type))
+
+        # add UTC timestamp
+        doc.add_attr('timestamp',   str(utils.to_utc_timestamp(line.timestamp)))
+
+        # add source attribute?
+        if line.source :
+            source_nickname, source_username, source_hostname, source_chanflags = line.source
+
+            if source_nickname :
+                doc.add_attr('source_nickname', source_nickname.encode('utf8'))
+            
+            if source_username :
+                doc.add_attr('source_username', source_username.encode('utf8'))
+
+            if source_hostname :
+                doc.add_attr('source_hostname', source_hostname.encode('utf8'))
+
+            if source_chanflags :
+                doc.add_attr('source_chanflags', source_chanflags.encode('utf8'))
+        
+        # add target attributes?
+        if line.target :
+            target_nickname = line.target
+
+            if target_nickname :
+                doc.add_attr('target_nickname', target_nickname.encode('utf8'))
+
+        # add data
+        if line.data :
+            doc.add_text(line.data.encode('utf8'))
+
+        # put, "clean up dispensable regions of the overwritten document"
+        if not self.db.put_doc(doc, hype.Database.PDCLEAN) :
+            raise SearchIndexError("put_doc", self.db)
+            
+    def search_cond (self, cond) :
+        """
+            Search using a raw hype.Condition. Raises NoResultsFound if there aren't any results
+        """
+
+        # execute search, unused 'flags' arg stays zero
+        results = self.db.search(cond, 0)
+
+        # no results?
+        if not results :
+            raise NoResultsFound()
+
+        # iterate over the document IDs
+        for doc_id in results :
+            # load document, this throws an exception...
+            # option constants are hype.Database.GDNOATTR/GDNOTEXT
+            doc = self.db.get_doc(doc_id, 0)
+
+            # load the attributes/text
+            channel         = self.channels.lookup(doc.attr('channel'))
+            type            = int(doc.attr('type'))
+            timestamp       = utils.from_utc_timestamp(int(doc.attr('timestamp')))
+
+            # source
+            source = (doc.attr('source_nickname'), doc.attr('source_username'), doc.attr('source_hostname'), doc.attr('source_chanflags'))
+
+            # target
+            target = doc.attr('target_nickname')
+            
+            # message text
+            message         = doc.cat_texts().decode('utf8')
+
+            # build+yield to as LogLine
+            yield log_line.LogLine(channel, None, type, timestamp, source, target, message)
+    
+    def search (self, options=None, channel=None, attrs=None, phrase=None, order=None, max=None, skip=None) :
+        """
+            Search with flexible parameters
+
+                options     - bitmask of hype.Condition.*
+                channel     - LogChannel object
+                attrs       - raw attribute expressions
+                phrase      - the search query phrase
+                order       - order attribute expression
+                max         - number of results to return
+                skip        - number of results to skip
+        """
+
+        # build condition
+        cond = hype.Condition()
+        
+        if options :
+            # set options
+            cond.set_options(options)
+        
+        if channel :
+            # add channel attribute
+            cond.add_attr(("channel STREQ %s" % channel.id).encode('utf8'))
+        
+        if attrs :
+            # add attributes
+            for attr in attrs :
+                cond.add_attr(attr.encode('utf8'))
+
+        if phrase :
+            # add phrase
+            cond.set_phrase(phrase.encode('utf8'))
+        
+        if order :
+            # set order
+            cond.set_order(order)
+        
+        if max :
+            # set max
+            cond.set_max(max)
+
+        if skip :
+            # set skip
+            cond.set_skip(skip)
+
+        # execute
+        return self.search_cond(cond)
+
+    def search_simple (self, channel, query, count=None, offset=None, search_msg=True, search_nick=False) :
+        """
+            Search for lines from the given channel for the given simple query.
+
+            The search_* params define which attributes to search for (using fulltext search for the message, STROR for
+            attributes).
+        """
+        
+        # search attributes
+        attrs = []
+
+        # nickname target query
+        if search_nick :
+            attrs.append("source_nickname STRINC %s" % query)
+#            attrs.append("target_nickname STRINC %s" % query)
+        
+        # use search(), backwards
+        results = list(self.search(
+            # simplified phrase
+            options     = hype.Condition.SIMPLE,
+
+            # specific channel
+            channel     = channel,
+
+            # given phrase
+            phrase      = query if search_msg else None,
+
+            # attributes defined above
+            attrs       = attrs,
+
+            # order by timestamp, descending (backwards)
+            order       = "timestamp NUMD",
+
+            # count/offset
+            max         = count,
+            skip        = offset,
+        ))
+        
+        # reverse
+        return reversed(results)
+
+    def list (self, channel, date, count=None, skip=None) :
+        """
+            List all indexed log items for the given UTC date
+        """
+
+        # start/end dates
+        dt_start = datetime.datetime(date.year, date.month, date.day, 0, 0, 0, 0)
+        dt_end   = datetime.datetime(date.year, date.month, date.day, 23, 23, 59, 999999)
+        
+        # search
+        return self.search(
+            # specific channel
+            channel     = channel,
+
+            # specific date range
+            attrs       = [
+                "timestamp NUMBT %d %d" % (utils.to_utc_timestamp(dt_start), utils.to_utc_timestamp(dt_end))
+            ],
+
+            # order correctly
+            order       = "timestamp NUMA",
+
+            # max count/offset
+            max         = count,
+            skip        = skip
+        )
+
+def get_index () :
+    """
+        Returns the default read-only index, suitable for searching
+    """
+    
+    # XXX: no caching, just open it every time
+    _index = LogSearchIndex(config.LOG_CHANNELS, config.SEARCH_INDEX_PATH, 'r')
+
+    # return
+    return _index
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/log_source.py	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,679 @@
+"""
+    A source of IRC log files
+"""
+
+import datetime, calendar, itertools, functools, math
+import os, os.path, errno
+import pytz
+
+import config, utils
+
+# a timedelta that represents one day
+ONE_DAY = datetime.timedelta(days=1)
+
+class LogSourceDecoder (object) :
+    """
+        Handles decoding of LogSource lines
+    """
+
+    def __init__ (self, encoding_list) :
+        """
+            Will try each of the given (charset, errors) items in turn, until one succeeds
+        """
+
+        self.encoding_list = encoding_list
+    
+    def decode (self, line) :
+        """
+            Decode the line of str() text into an unicode object
+        """
+        
+        # list of errors encountered
+        error_list = []
+        
+        # try each in turn
+        for charset, errors in self.encoding_list :
+            # trap UnicodeDecodeError to try with the next one
+            try :
+                return line.decode(charset, errors)
+
+            except UnicodeDecodeError, e :
+                error_list.append("%s:%s - %s" % (charset, errors, e))
+                continue
+
+        # failure
+        raise UnicodeDecodeError("Failed to decode line: %r: %s" % (line, ', '.join(error_list)))
+
+class LogSource (object) :
+    """
+        A collection of IRC logs for a specific target in some format. Provides the possibility to read specific events
+    """
+    
+    def __init__ (self, decoder, channel=None) :
+        """
+            The appropriate LogChannel must be given, as we need to be able to construct the LogLines. If it is not yet
+            known, then it can be given as None, and set later with bind_channel.
+
+            Uses the given LogSourceDecoder to decode the lines.
+        """
+        
+        self.channel = channel
+        self.decoder = decoder
+    
+    def bind_channel (self, channel) :
+        """
+            Set this source's channel, where None was set before
+        """
+
+        assert not self.channel
+
+        self.channel = channel
+
+    def get_latest (self, count) :
+        """
+            Yield the latest events, up to `count` of them.
+        """
+
+        abstract
+    
+    def get_date (self, dt) :
+        """
+            Get logs for the given date (as a datetime).
+        """
+
+        abstract
+    
+    def get_date_paged (self, dt, count, page=None) :
+        """
+            Get the logs for a given date (as a datetime), divided into pages of count each. If page is given, the time
+            portion of the dt is ignored, and the lines for the given page are returned. Otherwise, if page is None,
+            then the lines for the page containing the given timestamp is returned.
+
+            The return value is a (page, max, lines) tuple.
+        """
+        
+        # how to act?
+        if page :
+            # constant skip
+            skip = (page - 1) * count
+
+        else :
+            skip = None
+
+            # go through the logs a page at a time
+            this_page = 1
+
+            # last line's timestamp
+            last_ts = None
+
+            # found it yet?
+            found = False
+
+        # count the full number of lines
+        line_count = 0
+
+        # collect lines
+        lines = []
+
+        # iterate using get_date
+        for line in self.get_date(dt) :
+            # count them
+            line_count += 1
+
+            # skip?
+            if skip :
+                skip -= 1
+                continue
+            
+            # is this page all that we want/need?
+            if page or found :
+                # already full?
+                if len(lines) >= count :
+                    continue
+
+            # specfic timestamp
+            else :
+                # didn't find it in this page?
+                if len(lines) >= count :
+                    # reset to next page
+                    lines = []
+                    this_page += 1
+
+                # is dt between these two timestamps?
+                if (not last_ts or last_ts <= dt) and (dt <= line.timestamp) :
+                    # found!
+                    found = True
+                    page = this_page
+
+                else :
+                    # keep looking
+                    last_ts = line.timestamp
+
+            # store line
+            lines.append(line)
+        
+        # calculate max_pages
+        max_pages = math.ceil(float(line_count) / count)
+        
+        # return
+        return (page, max_pages, lines)
+
+    def get_month_days (self, dt) :
+        """
+            Return an ordered sequence of dates, telling which days in the given month (as a datetime) have logs available.
+        """
+
+        abstract
+    
+    def get_modified (self, dt=None, after=None, until=None) :
+        """
+            Returns a sequence of LogLines that may have been *modified* from their old values since the given datetime.
+
+            If the datetime is not given, *all* lines are returned.
+
+            If after is given, only lines from said date onwards will be returned, regardless of modification.
+            If until is given, only lines up to and including said date will be returned, regardless of modification.
+
+            The LogLines should be in time order.
+        """
+
+        abstract
+    
+    def get_prev_date (self, dt) :
+        """
+            Get the next distinct date of logs available preceeding the given date, or None
+        """
+
+        abstract
+
+    def get_next_date (self, dt) :
+        """
+            Get the next distinct date of logs following the given date, or None.
+        """
+        
+        abstract
+
+class LogFile (object) :
+    """
+        A file containing LogEvents
+
+        XXX: modify to implement LogSource?
+    """
+
+    def __init__ (self, path, parser, decoder, channel=None, start_date=None, sep='\n') :
+        """
+            Open the file at the given path, which contains lines as separated by the given separator. Lines are
+            decoded using the given LogSourceDecoder, and then parsed using the given parser, using the given date
+            as the initial date for this log's first line.
+            
+            XXX: currently we assume start_date also for the end of the file
+        """
+        
+        # store
+        self.channel = channel
+        self.path = path
+        self.parser = parser
+        self.start_date = start_date
+        self.decoder = decoder
+        self.sep = sep
+
+        # open
+        self.file = open(path, 'rb')
+
+    def __iter__ (self) :
+        """
+            Yields a series of unicode lines, as read from the top of the file
+        """
+        
+        # seek to beginning
+        self.file.seek(0)
+
+        # iterate over lines, decoding them as well
+        return (self.decoder.decode(line.rstrip(self.sep)) for line in self.file)
+    
+    def read_full (self) :
+        """
+            Reads all LogLines. The LogLines will have a valid offset.
+        """
+        
+        # just use our __iter__
+        return self.parser.parse_lines(self.channel, self, self.start_date, starting_offset=1)
+
+    def read_from (self, dt) :
+        """
+            Reads all LogLines from the given naive timestamp onwards
+        """
+        
+        # start reading at beginning
+        events = self.read_full()
+        
+        # skip unwanted events
+        for event in events :
+            if event.timestamp < dt :
+                continue
+
+            else :
+                # include this line as well
+                yield event
+                break
+        
+        # yield the rest as-is
+        for event in events :
+            yield event
+
+    def read_until (self, dt) :
+        """
+            Reads all LogLines up until the given naive timestamp
+        """
+
+        # start reading events at the beginning
+        events = self.read_full()
+
+        # yield events until we hit the given timestamp
+        for event in events :
+            if event.timestamp <= dt :
+                yield event
+
+            else :
+                break
+            
+        # ignore the rest
+        return
+
+    def _read_blocks_reverse (self, blocksize=1024) :
+        """
+            Yields blocks of file data in reverse order, starting at the end of the file
+        """
+
+        # seek to end of file
+        self.file.seek(0, os.SEEK_END)
+
+        # read offset
+        # XXX: hack -1 to get rid of trailing newline
+        size = offset = self.file.tell() - 1
+        
+        # do not try to read past the beginning of the file
+        while offset > 0:
+            # calc new offset + size
+            if offset > blocksize :
+                # full block
+                offset -= blocksize
+                read_size = blocksize
+
+            else :
+                # partial block
+                read_size = offset
+                offset = 0
+
+            # seek to offset
+            self.file.seek(offset)
+
+            # read the data we want
+            block = self.file.read(read_size)
+
+            # sanity check
+            assert len(block) == read_size
+
+            # yield 
+            yield block
+    
+    def _read_lines_reverse (self) :
+        """
+            Yields decoded lines from the end of the file, in reverse order.
+        """
+
+        # partial lines
+        buf = ''
+        
+        # read from end of file, a block at a time
+        for block in self._read_blocks_reverse() :
+            # add in our previous buf
+            buf = block + buf
+            
+            # split up lines
+            lines = buf.split(self.sep)
+
+            # keep the first one as our buffer, as it's incomplete
+            buf = lines[0]
+           
+            # yield the rest a line at a time in reverse order... this looks weird, but that's how slicing works :)
+            # XXX: use something like islice, this has to build a slice object
+            for line in lines[:0:-1] :
+                yield self.decoder.decode(line)
+
+    def read_latest (self, count) :
+        """
+            Returns up to count events, from the end of the file, or less, if the file doesn't contain that many lines.
+        """
+
+        # the list of lines
+        lines = []
+
+        # start reading lines into lines
+        for line in self._read_lines_reverse() :
+            # append
+            lines.append(line)
+
+            # done?
+            if len(lines) >= count :
+                break
+        
+        # decode in reverse order, using our starting date....
+        # XXX: use lines[::-1] or reversed?
+        # XXX: it may make more sense to parse in reverse order, using 'self.end_date' or something like that
+        return self.parser.parse_lines(self.channel, reversed(lines), self.start_date)
+
+class LogDirectory (LogSource) :
+    """
+        A directory containing a series of timestamped LogFiles
+    """
+
+    def __init__ (self, path, tz, parser, decoder, filename_fmt, channel=None) :
+        """
+            Load the logfiles at the given path, which are for the given LogChannel
+            
+            Decode the file lines using the given decoder, the files are named according the the date in the given
+            timezone and date format, and will be parsed using the given parser.
+        """
+
+        # store
+        self.channel = channel
+        self.path = path
+        self.tz = tz
+        self.parser = parser
+        self.decoder = decoder
+        self.filename_fmt = filename_fmt
+
+    def _get_logfile_date (self, d, load=True, mtime=False, ignore_missing=False) :
+        """
+            Get the logfile corresponding to the given naive date in our timezone. 
+            
+            If load is False, only test for the presence of the logfile, do not actually open it. If mtime is given,
+            then this returns the file's mtime
+
+            Returns None if the logfile does not exist, unless ignore_missing is given as False.
+        """
+
+        # format filename
+        filename = d.strftime(self.filename_fmt)
+
+        # build path
+        path = os.path.join(self.path, filename)
+        
+        try :
+            if load :
+                # open+return the LogFile
+                return LogFile(path, self.parser, self.decoder, start_date=d, channel=self.channel)
+            
+            elif mtime :
+                # stat
+                return utils.mtime(path)
+
+            else :
+                # test
+                return os.path.exists(path)
+
+        # XXX: move to LogFile
+        except IOError, e :
+            # return None for missing files
+            if e.errno == errno.ENOENT and ignore_missing :
+                return None
+
+            else :
+                raise
+    
+    def _iter_logfile_dates (self, after=None, until=None, reverse=False) :
+        """
+            Yields a series of naive datetime objects representing the logfiles that are available, in time order.
+            
+            Parameters :
+                after   only dates from said date onwards will be returned
+                until   only dates up to and including said date will be returned
+                reverse the dates are returned in reverse order instead. Note that the meaning of after/until doesn't change
+        """
+
+        # convert timestamps to our timezone's dates
+        if after :
+            after = after.astimezone(self.tz).date()
+
+        if until :
+            until = until.astimezone(self.tz).date()
+
+        # listdir
+        filenames = os.listdir(self.path)
+
+        # sort
+        filenames.sort(reverse=reverse)
+
+        # iter files
+        for filename in filenames :
+            try :
+                # parse date
+                dt = self.tz.localize(datetime.datetime.strptime(filename, self.filename_fmt))
+                date = dt.date()
+            
+            except :
+                # ignore
+                continue
+
+            else :
+                if (after and date < after) or (until and date > until) :
+                    # ignore
+                    continue
+                
+                else :
+                    # yield
+                    yield dt
+            
+    def _iter_date_reverse (self, dt=None) :
+        """
+            Yields an infinite series of naive date objects in our timezone, iterating backwards in time starting at the
+            given *datetime*, or the the current date, if none given
+        """
+        
+        # default to now
+        if not dt :
+            dtz = self.tz.localize(datetime.datetime.now())
+
+        else :
+            # convert to target timezone
+            dtz = dt.astimezone(self.tz)
+
+        # iterate unto infinity
+        while True :
+            # yield
+            yield dtz.date()
+            
+            # one day sdrawkcab
+            dtz -= ONE_DAY
+    
+    def _iter_logfile_reverse (self, dt=None, max_files=100) :
+        """
+            Yields a series of LogFile objects, iterating backwards in time starting at the given datetime, or the
+            current date, if none given.
+
+            Reads/probes at most max_files files.
+        """
+        
+        # start counting at zero...
+        file_count = 0
+
+        # have we found any files at all so far?
+        have_found = False
+
+        # iterate backwards over days
+        for day in self._iter_date_reverse(dt) :
+            # stop if we've handled enough files by now
+            if file_count > max_files :
+                break
+            
+            # try and open the next logfile
+            logfile = None
+            
+            file_count += 1
+            logfile = self._get_logfile_date(day, ignore_missing=True)
+            
+            # no logfile there?
+            if not logfile :
+                # hit our limit?
+                if file_count > max_files :
+                    # if we didn't find any logfiles at all, terminate rudely
+                    if not have_found :
+                        raise Exception("No recent logfiles found")
+                    
+                    else :
+                        # stop looking, deal with what we've got
+                        return
+
+                else :
+                    # skip to next day
+                    continue
+            
+            # mark have_found
+            have_found = True
+
+            # yield it
+            yield logfile
+
+    def get_latest (self, count) :
+        """
+            Uses _logfile_reverse to read the yield the given lines from as many logfiles as needed
+        """
+
+        # read the events into here
+        lines = []
+        
+        # start reading in those logfiles
+        for logfile in self._iter_logfile_reverse() :
+            # read the events
+            # XXX: use a queue
+            lines = list(logfile.read_latest(count)) + lines
+
+            # done?
+            if len(lines) >= count :
+                break
+        
+        # return the events
+        return lines
+
+    def get_date (self, dt) :
+        """
+            A 'day' is considered to be a 24-hour period from 00:00:00 23:59:59. If the timezone of the given datetime
+            differs from our native datetime, this may involve lines from more than one logfile.
+        """
+
+        # begin/end of 24h period, in target timezone
+        dtz_begin = dt.replace(hour=0, minute=0, second=0).astimezone(self.tz)
+        dtz_end = dt.replace(hour=23, minute=59, second=59, microsecond=999999).astimezone(self.tz)
+
+        # as dates
+        d_begin = dtz_begin.date() 
+        d_end = dtz_end.date()
+        
+#        print
+#        print "LogDirectory.get_date - %s" % dt
+#        print "\t   %s %s" % (d_begin, dtz_begin)
+#        print "\t-> %s %s" % (d_end, dtz_end)
+
+        # if they're the same, just pull the full log for that date
+        if d_begin == d_end :
+            # open that log
+            logfile = self._get_logfile_date(d_begin)
+            
+            # return the full data
+            return logfile.read_full()
+        
+        # otherwise, we need to pull two partial logs
+        else :
+            # open both of them, but it's okay if we don't have the second one
+            f_begin = self._get_logfile_date(d_begin)
+            f_end = self._get_logfile_date(d_end, ignore_missing=True)
+
+            # chain together the two sources
+            return itertools.chain(
+                f_begin.read_from(dtz_begin), 
+                f_end.read_until(dtz_end) if f_end else []
+            )
+    
+    def _iter_month_days (self, month) :
+        """
+            Iterates over the days of a month as dt objects with time=0
+        """
+        
+        # there's at most 31 days in a month...
+        for day in xrange(1, 32) :
+            try :
+                # try and build the datetime
+                dt = datetime.datetime(month.year, month.month, day)
+
+            except :
+                # stop
+                return
+            
+            else :
+                # fix timezones + yield
+                yield month.tzinfo.localize(dt)
+
+    def get_month_days (self, month) :
+        """
+            Returns a set of dates for which logfiles are available in the given datetime's month
+        """
+
+        # iterate over month's days
+        for dt in self._iter_month_days(month) :
+            # date in our target timezone
+            log_date = dt.astimezone(self.tz).date()
+            
+            # test for it
+            if self._get_logfile_date(log_date, load=False, ignore_missing=True) :
+                # valid
+                yield dt.date()
+
+    def get_modified (self, dt=None, after=None, until=None) :
+        """
+            Returns the contents off all logfiles with mtimes past the given date
+        """
+
+        # iterate through all available logfiles in date order, as datetimes, from the given date on
+        for log_date in self._iter_logfile_dates(after, until) :
+            # compare against dt?
+            if dt :
+                # stat
+                mtime = self._get_logfile_date(log_date, load=False, mtime=True, ignore_missing=True)
+                
+                # not modified?
+                if mtime < dt :
+                    # skip
+                    continue
+                
+            # open
+            logfile = self._get_logfile_date(log_date)
+
+            # yield all lines
+            for line in logfile.read_full() :
+                yield line
+
+    def get_prev_date (self, dt) :
+        """
+            Just use _iter_logfile_dates
+        """
+        
+        # use for to "iter" once
+        for log_date in self._iter_logfile_dates(until=dt - ONE_DAY, reverse=True) :
+            return log_date
+        
+        else :
+            return None
+
+    def get_next_date (self, dt) :
+        """
+            Just use _iter_logfile_dates
+        """
+        
+        # use for to "iter" once
+        for log_date in self._iter_logfile_dates(after=dt + ONE_DAY) :
+            return log_date
+        
+        else :
+            return None
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/preferences.py	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,534 @@
+"""
+    Handling user preferences
+"""
+
+import functools
+import Cookie
+
+from qmsk.web import urltree
+import utils
+
+class Preference (urltree.URLType) :
+    """
+        A specific preference
+    """
+
+    # the name to use
+    name = None
+
+    # the default value, as from parse()
+    default = None
+    
+    def is_default (self, value) :
+        """
+            Returns True if the given post-value is the default value for this preference.
+
+            Defaults to just compare against self.default
+        """
+
+        return (value == self.default)
+        
+    def process (self, preferences, value) :
+        """
+            Post-process this preference value. This can access the post-processed values of all other preferences that
+            were defined before this one in the list given to Preferences. 
+
+            Defaults to just return value.
+        """
+
+        return value
+
+class RequestPreferences (object) :
+    """
+        Represents the specific preferences for some request
+    """
+
+    def __init__ (self, preferences, request, value_map=None) :
+        """
+            Initialize with the given Preferences object, http Request, and { key: value } mapping of raw preference values.
+
+            This will build a mapping of { name: pre-value } using Preference.parse/Preference.default, and then
+            post-process them into the final { name: value } mapping using Preference.process, in strict pref_list
+            order. Note that the process() method will only have access to those preferences processed before it was.
+        """
+        
+        # store
+        self.preferences = preferences
+        self.request = request
+
+        # initialize
+        self.values = {}
+        self.set_cookies = {}
+
+        # initial value map
+        pre_values = {}
+
+        # load preferences
+        for pref in preferences.pref_list :
+            # got a value for it?
+            if value_map and pref.name in value_map :
+                # get value
+                value = value_map[pref.name]
+
+                # parse it
+                value = pref.parse(value)
+
+            else :
+                # use default value
+                value = pref.default
+                
+            # add
+            pre_values[pref.name] = value
+        
+        # then post-process using Preferences.process(), in strict pref_list order
+        for pref in preferences.pref_list :
+            # store into self.values, so that pref.get(...) will be able to access the still-incomplete self.values
+            # dict
+            self.values[pref.name] = pref.process(self, pre_values[pref.name])
+    
+    def _get_name (self, pref) :
+        """
+            Look up a Preference's name, either by class, object or name.
+        """
+
+        # Preference -> name
+        if isinstance(pref, Preference) :
+            pref = pref.name
+
+        return pref
+    
+    def pref (self, name) :
+        """
+            Look up a Preference by object, name
+        """
+
+        # Preference
+        if isinstance(name, Preference) :
+            return name
+        
+        # Preference.name
+        elif isinstance(name, basestring) :
+            return self.preferences.pref_map[name]
+        
+        # XXX: class?
+        else :
+            assert False
+
+    def get (self, pref) :
+        """
+            Return the value for the given Preference, or preference name
+        """
+        
+        # look up
+        return self.values[self._get_name(pref)]
+
+    # support dict-access
+    __getitem__ = get
+    
+    def is_default (self, pref) :
+        """
+            Returns True if the given preference is at its default value
+        """
+        
+        # determine using Preference.is_default
+        return self.pref(pref).is_default(self.get(pref))
+
+    def build (self, pref) :
+        """
+            Like 'get', but return the raw cookie value
+        """
+        
+        # the Preference
+        pref = self.pref(pref)
+        
+        # build
+        return pref.build(self.get(pref))
+    
+    def parse (self, pref, value=None) :
+        """
+            Parse+process the raw value for some pref into a value object.
+
+            Is the given raw value is None, this uses Preference.default
+        """
+
+        # lookup pref
+        pref = self.pref(pref)
+        
+        # build value
+        if value is not None :
+            # parse
+            value = pref.parse(value)
+        
+        else :
+            # default
+            value = pref.default
+        
+        # post-process
+        value = pref.process(self, value)
+
+        # return
+        return value
+
+    def set (self, name, value_obj=None) :
+        """
+            Set a new value for the given preference (by str name).
+
+            If value_obj is None, then the preference cookie is unset
+        """
+
+        # sanity-check to make sure we're not setting it twice...
+        assert name not in self.set_cookies
+        
+        # None?
+        if value_obj is not None :
+            # encode using the Preference object
+            value_str = self.preferences.pref_map[name].build(value_obj)
+        
+        else :
+            # unset as None
+            value_str = None
+
+        # update in our dict
+        self.values[name] = value_obj
+
+        # add to set_cookies
+        self.set_cookies[name] = value_str
+
+class Preferences (object) :
+    """
+        Handle user preferences using cookies
+    """
+
+    def __init__ (self, pref_list) :
+        """
+            Use the given list of Preference objects.
+
+            The ordering of the given pref_list is significant for the process() implementation, as the
+            Preferences are process()'d in order.
+        """
+
+        # store
+        self.pref_list = pref_list
+
+        # translate to mapping as well
+        self.pref_map = dict((pref.name, pref) for pref in pref_list)
+
+    def load (self, request, ) :
+        """
+            Load the set of preferences for the given request, and return as a { name -> value } dict
+        """
+
+        # the dict of values
+        values = {}
+
+        # load the cookies
+        cookie_data = request.env.get('HTTP_COOKIE')
+
+        # got any?
+        if cookie_data :
+            # parse into a SimpleCookie
+            cookies = Cookie.SimpleCookie(cookie_data)
+
+            # update the the values
+            values.update((morsel.key, morsel.value) for morsel in cookies.itervalues())
+        
+        else :
+            cookies = None
+
+        # apply any query parameters
+        for pref in self.pref_list :
+            # look for a query param
+            value = request.get_arg(pref.name)
+
+            if value :
+                # override
+                values[pref.name] = value
+
+        # build the RequestPreferences object
+        return cookies, RequestPreferences(self, request, values)
+
+    def handler (self, *pref_list) :
+        """
+            Intended to be used as a decorator for a request handler, this will load the give Preferences and pass
+            them to the wrapped handler as keyword arguments, in addition to any others given.
+        """
+
+        def _decorator (func) :
+            @functools.wraps(func)
+            def _handler (request, **args) :
+                # load preferences
+                cookies, prefs = self.load(request)
+
+                # bind to request.prefs
+                # XXX: better way to do this? :/
+                request.prefs = prefs
+
+                # update args with new ones
+                args.update(((pref.name, prefs.get(pref)) for pref in pref_list))
+
+                # handle to get response
+                response = func(request, **args)
+
+                # set cookies?
+                if prefs.set_cookies :
+                    # default, empty, cookiejar
+                    if not cookies :
+                        cookies = Cookie.SimpleCookie('')
+
+                    # update cookies
+                    for key, value in prefs.set_cookies.iteritems() :
+                        if value is None :
+                            assert False, "Not implemented yet..."
+
+                        else :
+                            # set
+                            cookies[key] = value
+                            cookies[key]["path"] = config.PREF_COOKIE_PATH
+                            cookies[key]["expires"] = config.PREF_COOKIE_EXPIRE_SECONDS
+
+                    # add headers
+                    for morsel in cookies.itervalues() :
+                        response.add_header('Set-cookie', morsel.OutputString())
+
+                return response
+            
+            # return wrapped handler
+            return _handler
+        
+        # return decorator...
+        return _decorator
+
+# now for our defined preferences....
+import pytz
+import config
+
+class TimeFormat (urltree.URLStringType, Preference) :
+    """
+        Time format
+    """
+
+    # set name
+    name = 'time_format'
+
+    # default value
+    default = config.PREF_TIME_FMT_DEFAULT
+
+class DateFormat (urltree.URLStringType, Preference) :
+    """
+        Date format
+    """
+
+    # set name
+    name = 'date_format'
+
+    # default value
+    default = config.PREF_DATE_FMT_DEFAULT
+
+class TimezoneOffset (Preference) :
+    """
+        If the DST-aware 'timezone' is missing, we can fallback to a fixed-offset timezone as detected by
+        Javascript.
+
+        This is read-only, and None by default
+    """
+
+    name = 'timezone_offset'
+    default = None
+
+    def parse (self, offset) :
+        """
+            Offset in minutes -> said minutes
+        """
+
+        return int(offset)
+
+class Timezone (Preference) :
+    """
+        Timezone
+    """
+    
+    # set name
+    name = 'timezone'
+
+    # default is handled via process()
+    default = 'auto'
+
+    # the list of available (value, name) options for use with helpers.select_options
+    OPTIONS = [('auto', "Autodetect")] + [(None, tz_name) for tz_name in pytz.common_timezones]
+
+    def parse (self, name) :
+        """
+            default -> default
+            tz_name -> pytz.timezone
+        """
+        
+        # special-case for 'auto'
+        if name == self.default :
+            return self.default
+
+        else :
+            return pytz.timezone(name)
+
+    def is_default (self, tz) :
+        """
+            True if it's a FixedOffsetTimezone or PREF_TIMEZONE_FALLBACK
+        """
+
+        return (isinstance(tz, utils.FixedOffsetTimezone) or tz == config.PREF_TIMEZONE_FALLBACK)
+
+    def build (self, tz) :
+        """
+            FixedOffsetTimezone -> None
+            pytz.timezone -> tz_name
+        """
+        
+        # special-case for auto/no explicit timezone
+        if self.is_default(tz) :
+            return self.default
+
+        else :
+            # pytz.timezone zone name
+            return tz.zone
+    
+    def process (self, prefs, tz) :
+        """
+            If this timezone is given, simply build that. Otherwise, try and use TimezoneOffset, and if that fails,
+            just return the default.
+
+            None -> FixedOffsetTimezone/PREF_TIMEZONE_FALLBACK
+            pytz.timezone -> pytz.timezone
+        """
+        
+        # specific timezone set?
+        if tz != self.default :
+            return tz
+        
+        # fixed offset?
+        elif prefs[timezone_offset] is not None :
+            return utils.FixedOffsetTimezone(prefs[timezone_offset])
+        
+        # default
+        else :
+            return config.PREF_TIMEZONE_FALLBACK
+
+class ImageFont (Preference) :
+    """
+        Font for ImageFormatter
+    """
+
+    # set name
+    name = 'image_font'
+    
+    def __init__ (self, font_dict, default_name) :
+        """
+            Use the given { name: (path, title) } dict and default the given name
+        """
+
+        self.font_dict = font_dict
+        self.default = self.parse(default_name)
+    
+    def parse (self, name) :
+        """
+            name -> (name, path, title)
+        """
+
+        path, title = self.font_dict[name]
+
+        return name, path, title
+    
+    def build (self, font_info) :
+        """
+            (name, path, title) -> name
+        """
+
+        name, path, title = font_info
+
+        return name
+
+class ImageFontSize (urltree.URLIntegerType, Preference) :
+    # set name, default
+    name = 'image_font_size'
+    default = config.PREF_IMAGE_FONT_SIZE_DEFAULT
+    
+    # XXX: constraints for valid values
+
+class Formatter (Preference) :
+    """
+        LogFormatter to use
+    """
+
+    # set name
+    name = 'formatter'
+
+    def __init__ (self, formatters, default) :
+        """
+            Use the given { name -> class LogFormatter } dict and default (a LogFormatter class)
+        """
+
+        self.formatters = formatters
+        self.default = default
+    
+    def parse (self, fmt_name) :
+        """
+            fmt_name -> class LogFormatter
+        """
+
+        return self.formatters[fmt_name]
+    
+    def build (self, fmt_cls) :
+        """
+            class LogFormatter -> fmt_name
+        """
+
+        return fmt_cls.name
+    
+    def process (self, prefs, fmt_cls) :
+        """
+            class LogFormatter -> LogFormatter(tz, time_fmt, image_font.path)
+        """
+
+        # time stuff
+        tz = prefs[timezone]
+        time_fmt = prefs[time_format]
+        
+        # font stuff
+        font_name, font_path, font_title = prefs[image_font]
+        font_size = prefs[image_font_size]
+
+        return fmt_cls(tz, time_fmt, font_path, font_size)
+
+class Count (urltree.URLIntegerType, Preference) :
+    """
+        Number of lines of log data to display per page
+    """
+
+    # set name
+    name = "count"
+    
+    # default
+    default = config.PREF_COUNT_DEFAULT
+    
+    def __init__ (self) :
+        super(Count, self).__init__(allow_negative=False, allow_zero=False, max=config.PREF_COUNT_MAX)
+
+# and then build the Preferences object
+time_format     = TimeFormat()
+date_format     = DateFormat()
+timezone_offset = TimezoneOffset()
+timezone        = Timezone()
+image_font      = ImageFont(config.FORMATTER_IMAGE_FONTS, config.PREF_IMAGE_FONT_DEFAULT)
+image_font_size = ImageFontSize()
+formatter       = Formatter(config.LOG_FORMATTERS, config.PREF_FORMATTER_DEFAULT)
+count           = Count()
+
+preferences = Preferences([
+    time_format,
+    date_format,
+    timezone_offset,
+    timezone,
+    image_font,
+    image_font_size,
+    formatter,
+    count,
+])
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/urls.py	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,46 @@
+
+"""
+    URL mapping for the irclogs.qmsk.net site
+"""
+
+# urltree stuff
+from qmsk.web import urltree
+
+# our own handlers
+import handlers
+
+# for types
+import utils
+
+# for configuration
+import config
+
+# our URLTypes
+types   = dict(
+    # LogChannel
+    cid     = utils.URLChannelName(config.LOG_CHANNELS.dict()),
+
+    # datetime
+    date    = utils.URLDateType(config.URL_DATE_FMT),
+
+    # UTC timestamp
+    ts      = utils.URLTimestampType(),
+)
+
+# our URLConfig
+urls = url = urltree.URLConfig(type_dict=types)
+
+# urls
+index               = url('/',                                                              handlers.index                              )
+preferences         = url('/preferences',                                                   handlers.preferences_                       )
+channel_select      = url('/channel_select/?channel:cid',                                   handlers.channel_select                     )
+channel             = url('/channels/{channel:cid}',                                        handlers.channel_last,      count=20        )
+channel_last        = url('/channels/{channel:cid}/last/{count:int=100}/{type=}',           handlers.channel_last                       )
+channel_link        = url('/channels/{channel:cid}/link/{timestamp:ts}/?type=',             handlers.channel_link                       )
+channel_calendar    = url('/channels/{channel:cid}/calendar/{year:int=0}/{month:int=0}',    handlers.channel_calendar                   )
+channel_date        = url('/channels/{channel:cid}/date/{date:date}/?page:int=1&type=',     handlers.channel_date                       )
+channel_search      = url('/channels/{channel:cid}/search/?q=&page:int=1&max:int=1&type=&t:list=',  handlers.channel_search                     )
+
+# mapper
+mapper = urltree.URLTree(urls)
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/utils.py	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,137 @@
+"""
+    Miscellaneous things
+"""
+
+import datetime, calendar, pytz
+import os, errno
+
+from qmsk.web.urltree import URLType
+
+class URLChannelName (URLType) :
+    """
+        Handle LogChannel names in URLs. Deals with instances of LogChannel
+    """
+
+    def __init__ (self, channels) :
+        """
+            Use the given { name -> LogChannel } dict
+        """
+
+        self.channels = channels
+    
+    def parse (self, chan_name) :
+        """
+            chan_name -> LogChannel
+        """
+
+        return self.channels[chan_name]
+
+    def build (self, chan) :
+        """
+            LogChannel -> chan_name
+        """
+
+        return chan.id
+
+class URLDateType (URLType) :
+    """
+        Handle dates in URLs as naive datetime objects (with indeterminate time info)
+    """
+
+    def __init__ (self, date_fmt) :
+        """
+            Format/parse dates using the given format
+        """
+
+        self.date_fmt = date_fmt
+    
+    def parse (self, date_str) :
+        """
+            date_str -> naive datetime.datetime
+        """
+        
+        return datetime.datetime.strptime(date_str, self.date_fmt)
+    
+    def build (self, date) :
+        """
+            datetime.date -> date_str
+        """
+
+        return date.strftime(self.date_fmt)
+
+class URLTimestampType (URLType) :
+    """
+        Handles an integer UNIX timestamp as an UTC datetime
+    """
+
+    def parse (self, timestamp_str) :
+        """
+            timestamp_str -> pytz.utc datetime.datetime
+        """
+        
+        return from_utc_timestamp(int(timestamp_str))
+    
+    def build (self, dtz) :
+        """
+            pytz.utc datetime.datetime -> timestamp_str
+        """
+        
+        return str(to_utc_timestamp(dtz))
+
+def from_utc_timestamp (timestamp) :
+    """
+        Converts a UNIX timestamp into a datetime.datetime
+    """
+
+    return datetime.datetime.utcfromtimestamp(timestamp).replace(tzinfo=pytz.utc)
+
+def to_utc_timestamp (dt) :
+    """
+        Converts a datetime.datetime into a UNIX timestamp
+    """
+
+    return calendar.timegm(dt.utctimetuple())
+
+def mtime (path, ignore_missing=False) :
+    """
+        Gets the mtime for the given path as an UTC datetime, or None, if the file doesn't exist and ignore_missing
+    """
+
+    try :
+        # stat
+        st = os.stat(path)
+    
+    # trap IOError
+    except os.error, e :
+        # ENOENT?
+        if ignore_missing and e.errno == errno.ENOENT :
+            return None
+
+        else :
+            raise
+
+    else :
+        # decode
+        return from_utc_timestamp(st.st_mtime)
+
+class FixedOffsetTimezone (pytz._FixedOffset) :
+    """
+        A Fixed-offset timezone with no DST info, compatible with pytz.
+
+        This is based on pytz._FixedOffset, but overrides dst() to return timedelta(0)
+    """
+
+    def __init__ (self, minutes) :
+        """
+            Minutes is simply the offset from UTC in minutes, positive or negative, at most 24h.
+        """
+
+        pytz._FixedOffset.__init__(self, minutes)
+
+    def dst (self, dt) :
+        """
+            No DST info
+        """
+
+        return datetime.timedelta(0)
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/version.py	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,106 @@
+"""
+    Figuring out the project version
+
+    Currently this only supports mercurial
+"""
+
+# only load this once
+_VERSION = None
+
+def version_mercurial (path) :
+    """
+        Returns a (branch, tags, parents, modified) tuple for the given repo's working copy
+    """
+
+    global _VERSION
+
+    # cached?
+    if _VERSION :
+        return _VERSION
+
+    # code adapted from mercurial.commands.identify
+    from mercurial import ui, hg, encoding
+    from mercurial.node import short
+    
+    # open the repo
+    repo = hg.repository(ui.ui(), path)
+
+    # the working copy change context
+    ctx = repo[None]
+
+    # branch
+    branch = encoding.tolocal(ctx.branch())
+    
+    # map default -> None
+    if branch == 'default' :
+        branch = None
+    
+    # list of tags, without 'tip' tag
+    tags = [tag for tag in ctx.tags() if tag != 'tip']
+
+    # ctx's parents
+    parents = [short(p.node()) for p in ctx.parents()]
+
+    # local modifications?
+    modified = bool(ctx.files() + ctx.deleted())
+
+    # done
+    _VERSION = (branch, tags, parents, modified)
+    return _VERSION
+
+def version_string (path='.') :
+    """
+        Return a version string representing the version of the software at the given path.
+
+        Currently, this assumes that the given path points to a local Mercurial repo.
+    """
+    
+    try :
+        # get info
+        branch, tags, parents, modified = version_mercurial(path)
+
+    except :
+        # XXX: ignore
+        raise
+
+    # tags: <tag> [ "-" <tag> [ ... ]]
+    if tags :
+        return '-'.join(tags)
+
+    # revision: <parent> [ "+" <parent> [ ... ]] [ "+" ]
+    revision = '+'.join(p for p in parents) + ('+' if modified else '')
+    
+    if branch :
+        # branch: "(" <branch> ")" <revision>
+        return '(%s)%s' % (branch, revision)
+
+    else :
+        # plain: <revision>
+        return revision
+
+def version_link_hg (hgweb_url, path='.') :
+    """
+        Returns a link to a hgweb page for this version
+    """
+
+    # URL for revision ID
+    rev_url = lambda rev: '<a href="%(url)s/rev/%(rev)s">%(rev)s</a>' % dict(url=hgweb_url, rev=rev)
+
+     # get info
+    branch, tags, parents, modified = version_mercurial(path)
+
+    # tags: <tag> [ "-" <tag> [ ... ]] [ "+" ]
+    if tags :
+        return '-'.join(rev_url(tag) for tag in tags) + ('+' if modified else '')
+
+    # revision: <parent> [ "+" <parent> [ ... ]] [ "+" ]
+    revision = '+'.join(rev_url(p) for p in parents) + ('+' if modified else '')
+    
+    if branch :
+        # branch: "(" <branch> ")" <revision> [ "+" ]
+        return '(%s)%s' % (rev_url(branch), revision) + ('+' if modified else '')
+
+    else :
+        # plain: <revision>
+        return revision
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/wsgi.py	Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,31 @@
+"""
+    Our custom WSGI application
+"""
+
+from qmsk.web import wsgi
+
+import urls, error
+
+# our custom app with custom error() method
+class Application (wsgi.Application) :
+    def __init__ (self) :
+        """
+            Construct wsgi.Application with our URLMapper
+        """
+
+        super(Application, self).__init__(urls.mapper)
+    
+    def handle_error (self, exc_info, env, start_response) :
+        """
+            Use error.build_error and return that
+        """
+        
+        # get info
+        status, content_type, body = error.build_error(env=env)
+
+        # headers
+        start_response(status, [('Content-type', content_type)], exc_info)
+
+        # body
+        return body
+

--- a/scripts/search-index	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,640 +0,0 @@
-#!/usr/bin/env python2.5
-
-"""
-    Tool for accessing the search index
-"""
-
-# XXX: fix path
-import sys; sys.path.insert(0, '.'); sys.path.insert(0, '..')
-
-import os, os.path, fcntl
-import datetime, pytz
-import optparse
-
-# configuration and the LogSearchIndex module
-import config, utils, log_search, channels
-
-def _open_index (options, open_mode) :
-    """
-        Opens the LogSearchIndex
-    """
-
-    return log_search.LogSearchIndex(config.LOG_CHANNELS, options.index_path, open_mode)
-
-
-def _open_index_and_channel (options, channel_name, open_mode) :
-    """
-        Opens+returns a LogSearchIndex and a LogChannel
-    """
-    
-    # open the LogSearchIndex
-    index = _open_index(options, open_mode)
-
-    # open the channel
-    channel = config.LOG_CHANNELS.lookup(channel_name)
-    
-    # return
-    return index, channel
-
-def _iter_insert_stats (index, channel, lines) :
-    """
-        Insert the given lines into the index.
-
-        Assumes the lines will be in time-order, and yields a series of (date, count) tuples for every date that lines
-        are inserted for
-    """
-
-    # last date
-    date = None
-
-    # count
-    count = 0
-
-    # iter lines
-    for line in lines :
-        # next day?
-        if not date or line.timestamp.date() != date :
-            if date :
-                # yield stats
-                yield date, count
-
-            # reset count
-            count = 0
-
-            # timestamp's date
-            date = line.timestamp.date()
-
-        # insert
-        index.insert_line(channel, line)
-
-        # count
-        count += 1
-    
-    # final count?
-    if date and count :
-        yield date, count
-
-def _insert_lines (index, options, channel, lines) :
-    """
-        Insert the given lines into the index.
-
-        Assumes the lines will be in time-order, and prints out as status messages the date and count for the inserted lines
-    """
-    
-    # iterate insert stats
-    for date, count in _iter_insert_stats(index, channel, lines) :
-        # output date header?
-        if not options.quiet :
-            print "%s: %s" % (date.strftime('%Y-%m-%d'), count),
-
-def _load_channel_date (index, options, channel, date) :
-    """
-        Loads the logs for the given date from the channel's LogSource into the given LogSearchIndex
-    """
-
-    if not options.quiet :
-        print "Loading date for channel %s" % channel.id
-        
-    try :
-        # load lines for date
-        lines = channel.source.get_date(date)
-    
-    except Exception, e :
-        if not options.skip_missing :
-            raise
-            
-        if not options.quiet :
-            print "\tSkipped: %s" % (e, )
-    
-    else :
-        # insert
-        _insert_lines(index, options, channel, lines)
-
-def _parse_date (options, date_str, tz=None, fmt='%Y-%m-%d') :
-    """
-        Parse the given datetime, using the given timezone(defaults to options.tz) and format
-    """
-
-    # default tz
-    if not tz :
-        tz = options.timezone
-
-    try :
-        # parse
-        return datetime.datetime.strptime(date_str, fmt).replace(tzinfo=tz)
-
-    except Exception, e :
-        raise CommandError("[ERROR] Invalid date: %s: %s" % (date_str, e))
-
-def _output_lines (options, lines) :
-    """
-        Display the formatted LogLines
-    """
-
-    # display as plaintext
-    for line, txt_data in options.formatter.format_txt(lines, full_timestamps=True) :
-        print txt_data
-
-class CommandError (Exception) :
-    """
-        Error with command-line arguments
-    """
-
-    pass
-
-def cmd_create (options) :
-    """
-        Creates a new index
-    """
-
-    # open index
-    index = _open_index(options, 'ctrunc' if options.force else 'c')
-
-    # that's all
-    pass
-
-def cmd_load (options, channel_name, *dates) :
-    """
-        Loads the logs for a specific channel for the given dates (in terms of the channe logs' timezone) into the index
-    """
-
-    # open index/channel
-    index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a')
-    
-    # handle each date
-    for date_str in dates :
-        # prase date
-        try :
-            date = _parse_date(options, date_str, channel.source.tz)
-        
-        # handle errors
-        except CommandError, e :
-            if options.skip_missing :
-                print "[ERROR] %s" % (date_name, e)
-
-            else :
-                raise
-        
-        # otherwise, load
-        else :        
-            _load_channel_date(index, options, channel, date)
-
-def cmd_load_month (options, channel_name, *months) :
-    """
-        Loads the logs for a specific channel for the given months (in terms of the channel's timezone) into the index
-    """
-
-    # open index/channel
-    index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a')
-    
-    # handle each date
-    for month_str in months :
-        # prase date
-        try :
-            month = _parse_date(options, month_str, channel.source.tz, '%Y-%m')
-        
-        # handle errors
-        except CommandError, e :
-            # skip?
-            if options.skip_missing :
-                if not options.quiet :
-                    print "[ERROR] %s" % (date_name, e)
-                continue
-
-            else :
-                raise
-        
-        # get the set of days
-        days = list(channel.source.get_month_days(month))
-        
-        if not options.quiet :
-            print "Loading %d days of logs:" % (len(days))
-
-        # load each day
-        for date in days :
-            # convert to datetime
-            dt = datetime.datetime.combine(date, datetime.time(0)).replace(tzinfo=channel.source.tz)
-            
-            # load
-            _load_channel_date(index, options, channel, dt)
-
-def cmd_search (options, channel_name, query) :
-    """
-        Search the index for events on a specific channel with the given query
-    """
-    
-    # sanity-check
-    if options.create :
-        raise Exception("--create doesn't make sense for 'search'")
-    
-    # open index/channel
-    index, channel = _open_index_and_channel(options, channel_name, 'r')
-    
-    # search
-    lines = index.search_simple(channel, query)
-    
-    # display
-    _output_lines(options, lines)
-
-def cmd_list (options, channel_name, *dates) :
-    """
-        List the indexed events for a specific date
-    """
-
-    # sanity-check
-    if options.create :
-        raise Exception("--create doesn't make sense for 'search'")
-    
-    # open index/channel
-    index, channel = _open_index_and_channel(options, channel_name, 'r')
-
-    # ...for each date
-    for date_str in dates :
-        # parse date
-        date = _parse_date(options, date_str)
-
-        # list
-        lines = index.list(channel, date)
-        
-        # display
-        _output_lines(options, lines)
-
-def _autoload_reset (options, channels) :
-    """
-        Reset old autoload state
-    """
-    
-    # warn
-    if not options.quiet :
-        print "[WARN] Resetting autoload state for: %s" % ', '.join(channel.id for channel in channels)
-    
-    # iter
-    for channel in channels :
-        # statefile path
-        statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id)
-
-        # is it present?
-        if not os.path.exists(statefile_path) :
-            if not options.quiet :
-                print "[WARN] No statefile found at %s" % statefile_path
-        
-        else :
-            if not options.quiet :
-                print "\t%s: " % channel.id,
-
-            # remove the statefile
-            os.remove(statefile_path)
-            
-            if not options.quiet :
-                print "OK"
-
-def cmd_autoload (options, *channel_names) :
-    """
-        Automatically loads all channel logs that have not been indexed yet (by logfile mtime)
-    """
-    
-    # open index, nonblocking
-    index = _open_index(options, 'c?' if options.create else 'a?')
-
-    # default to all channels
-    if not channel_names :
-        channels = config.LOG_CHANNELS
-    
-    else :
-        channels = [config.LOG_CHANNELS.lookup(channel_name) for channel_name in channel_names]
-    
-    # reset autoload state?
-    if options.reset :
-        _autoload_reset(options, channels)
-        if not options.quiet :
-            print
-
-    # iterate channels
-    for channel in channels :
-        if not options.quiet :
-            print "Channel %s:" % channel.id
-
-        # no 'from' by default
-        after = None
-
-        # path to our state file
-        statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id)
-        statefile_tmppath = statefile_path + '.tmp'
-
-        # does it exist?
-        have_tmpfile = os.path.exists(statefile_tmppath)
-        
-        # do we have a tempfile from a previous crash?
-        if have_tmpfile and not options.ignore_resume :
-            # first, open it...
-            statefile_tmp = open(statefile_tmppath, 'r+')
-
-            # ... then lock it
-            fcntl.lockf(statefile_tmp, fcntl.LOCK_EX | fcntl.LOCK_NB)
-            
-            # read after timestamp
-            after_str = statefile_tmp.read().rstrip()
-
-            if after_str :
-                # parse timestamp
-                after = utils.from_utc_timestamp(int(after_str))
-
-                if not options.quiet :
-                    print "\tContinuing earlier progress from %s" % after
-
-            else :
-                # ignore
-                if not options.quiet :
-                    print "\t[WARN] Ignoring empty temporary statefile"
-
-        else :
-            # warn about old tmpfile that was ignored
-            if have_tmpfile and not options.quiet :
-                print "\t[WARN] Ignoring old tmpfile state"
-
-            # open new tempfile
-            statefile_tmp = open(statefile_tmppath, 'w')
-            
-            # lock
-            fcntl.lockf(statefile_tmp, fcntl.LOCK_EX | fcntl.LOCK_NB)
-
-        # override?
-        if options.reload :
-            # load all
-            mtime = None
-
-            if not options.quiet :
-                print "\tForcing reload!"
-
-        # stat for mtime
-        else :
-            # stat for mtime, None if unknown
-            mtime = utils.mtime(statefile_path, ignore_missing=True)
-
-            if mtime and not options.quiet :
-                print "\tLast load time was %s" % mtime
-
-            elif not options.quiet :
-                print "\t[WARN] No previous load state! Loading full logs"
- 
-        # only after some specific date?
-        if options.after :
-            # use unless read from tempfile
-            if not after :
-                after = options.after
-               
-                if not options.quiet :
-                    print "\tOnly including dates from %s onwards" % after
-            
-            else :
-                if not options.quiet :
-                    print "\t[WARN] Ignoring --from because we found a tempfile"
-            
-        # only up to some specific date?
-        if options.until :
-            until = options.until
-
-            if not options.quiet :
-                print "\tOnly including dates up to (and including) %s" % until
-        else :
-            # default to now
-            until = None
-
-        # get lines
-        lines = channel.source.get_modified(mtime, after, until)
-        
-        # insert
-        if not options.quiet :
-            print "\tLoading and inserting..."
-            print
-     
-        # iterate insert() per day to display info and update progress
-        for date, count in _iter_insert_stats(index, channel, lines) :
-            # output date header?
-            if not options.quiet :
-                print "\t%10s: %d" % (date.strftime('%Y-%m-%d'), count)
-            
-            # write temp state
-            statefile_tmp.seek(0)
-            statefile_tmp.write(str(utils.to_utc_timestamp(datetime.datetime.combine(date, datetime.time(0)))))
-            statefile_tmp.flush()
-
-        # write autoload state
-        open(statefile_path, 'w').close()
-
-        # close+delete tempfile
-        statefile_tmp.close()
-        os.remove(statefile_tmppath)
-        
-        if not options.quiet :
-            print
-    
-    # done
-    return
-
-def cmd_help (options, *args) :
-    """
-        Help about commands
-    """
-
-    import inspect
-    
-    # general help stuff
-    options._parser.print_help()
-
-    # specific command?
-    if args :
-        # the command name
-        command, = args
-        
-        # XXX: display info about specific command
-        xxx
-    
-    # general
-    else :
-        print
-        print "Available commands:"
-
-        # build list of all cmd_* objects
-        cmd_objects = [(name, obj) for name, obj in globals().iteritems() if name.startswith('cmd_') and inspect.isfunction(obj)]
-
-        # sort alphabetically
-        cmd_objects.sort()
-        
-        # iterate through all cmd_* objects
-        for cmd_func_name, cmd_func in cmd_objects :
-            # remove cmd_ prefix
-            cmd_name = cmd_func_name[4:]
-
-            # inspect
-            cmd_args, cmd_varargs, cmd_varkw, cmd_default = inspect.getargspec(cmd_func)
-            cmd_doc = inspect.getdoc(cmd_func)
-
-            # remove the "options" arg
-            cmd_args = cmd_args[1:]
-
-            # display
-            print "\t%10s %-30s : %s" % (cmd_name, inspect.formatargspec(cmd_args, cmd_varargs, None, cmd_default), cmd_doc)
-
-class MyOption (optparse.Option) :
-    """
-        Our custom types for optparse
-    """
-
-    def check_date (option, opt, value) :
-        """
-            Parse a date
-        """
-
-        try :
-            # parse
-            return datetime.datetime.strptime(value, '%Y-%m-%d')
-        
-        # trap -> OptionValueError
-        except Exception, e :
-            raise optparse.OptionValueError("option %s: invalid date value: %r" % (opt, value))
-    
-    def check_timezone (option, opt, value) :
-        """
-            Parse a timezone
-        """
-
-        try :
-            # parse
-            return pytz.timezone(value)
-        
-        # trap -> OptionValueError
-        except Exception, e :
-            raise optparse.OptionValueError("option %s: invalid timezone: %r" % (opt, value))
-
-    def take_action (self, action, dest, opt, value, values, parser) :
-        """
-            Override take_action to handle date
-        """
-
-        if action == "parse_date" :
-            # get timezone
-            tz = values.timezone
-
-            # set timezone
-            value = value.replace(tzinfo=tz)
-
-            # store
-            return optparse.Option.take_action(self, 'store', dest, opt, value, values, parser)
-
-        else :
-            # default
-            return optparse.Option.take_action(self, action, dest, opt, value, values, parser)
-
-    TYPES = optparse.Option.TYPES + ('date', 'timezone')
-    TYPE_CHECKER = optparse.Option.TYPE_CHECKER.copy()
-    TYPE_CHECKER['date'] = check_date
-    TYPE_CHECKER['timezone'] = check_timezone
-    ACTIONS = optparse.Option.ACTIONS + ('parse_date', )
-    STORE_ACTIONS = optparse.Option.STORE_ACTIONS + ('parse_date', )
-    TYPED_ACTIONS = optparse.Option.TYPED_ACTIONS + ('parse_date', )
-    ACTIONS = optparse.Option.ACTIONS + ('parse_date', )
-
-def main (argv) :
-    """
-        Command-line main, with given argv
-    """
-
-    # define parser
-    parser = optparse.OptionParser(
-        usage           = "%prog [options] <command> [ ... ]",
-        add_help_option = False,
-        option_class    = MyOption,
-    )
-
-    # general options       #                   #                       #                                   #
-    general = optparse.OptionGroup(parser, "General Options")
-    general.add_option('-h', "--help",          dest="help",            help="Show this help message and exit",     
-                                                action="store_true"                                         )
-
-    general.add_option(     "--formatter",      dest="formatter_name",  help="LogFormatter to use",                 
-            metavar="FMT",  type="choice",                              default=config.PREF_FORMATTER_DEFAULT.name,
-            choices=[fmt_name for fmt_name in config.LOG_FORMATTERS.iterkeys()]                             )
-
-    general.add_option(     "--index",          dest="index_path",      help="Index database path",                 
-            metavar="PATH",                                             default=config.SEARCH_INDEX_PATH    )
-
-    general.add_option(     "--timezone",       dest="timezone",        help="Timezone for output",                 
-            metavar="TZ",   type="timezone",                            default=pytz.utc                    )
-
-    general.add_option(     "--force",          dest="force",           help="Force dangerous operation",           
-                                                action="store_true"                                         )
-
-    general.add_option(     "--quiet",          dest="quiet",           help="Supress status messages",             
-                                                action="store_true"                                         )
-    parser.add_option_group(general)
-    
-
-    # cmd_load options      #                   #                       #                                   #
-    load = optparse.OptionGroup(parser, "Load Options")
-    load.add_option(        "--skip-missing",   dest="skip_missing",    help="Skip missing logfiles",
-                                                action="store_true"                                         )
-
-    load.add_option(        "--create",         dest="create",          help="Create index database", 
-                                                action="store_true"                                         )
-    parser.add_option_group(load)
-    
-
-    # cmd_autoload options  #                   #                       #                                   #
-    autoload = optparse.OptionGroup(parser, "Autoload Options")
-    autoload.add_option(    "--autoload-state", dest="autoload_state_path", help="Path to autoload state dir",      
-            metavar="PATH",                                             default=config.SEARCH_AUTOINDEX_PATH)
-
-    autoload.add_option(    "--from",           dest="after",           help="Only autoload logfiles from the given date on", 
-            metavar="DATE", type="date",        action="parse_date",    default=None                        )
-
-    autoload.add_option(    "--until",          dest="until",           help="Only autoload logfiles up to (and including) the given date",  
-            metavar="DATE", type="date",        action="parse_date",    default=None                        )
-
-    autoload.add_option(    "--reload",         dest="reload",          help="Force reload lines",
-                                                action="store_true"                                         )
-
-    autoload.add_option(    "--reset",          dest="reset",           help="Reset old autload state",
-                                                action="store_true"                                         )
-
-    autoload.add_option(    "--ignore-resume",  dest="ignore_resume",   help="Do not try and resume interrupted autoload",  
-                                                action="store_true"                                         )
-    parser.add_option_group(autoload)
-
-    # parse
-    options, args = parser.parse_args(argv[1:])
-
-    # postprocess stuff
-    options._parser = parser
-    options.formatter = config.LOG_FORMATTERS[options.formatter_name](options.timezone, "%H:%M:%S", None, None)
-
-    # special-case --help
-    if options.help :
-        return cmd_help(options, *args)
-    
-    # must have at least the command argument
-    if not args :
-        raise CommandError("Missing command")
-    
-    # pop command
-    command = args.pop(0)
-    
-    # get func
-    func = globals().get('cmd_%s' % command)
-    
-    # unknown command?
-    if not func :
-        raise CommandError("Unknown command: %s" % command)
-    
-    # call
-    func(options, *args)
-
-if __name__ == '__main__' :
-    try :
-        main(sys.argv)
-        sys.exit(0)
-
-    except CommandError, e :
-        print e
-        sys.exit(1)
-

--- a/urls.py	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,46 +0,0 @@
-
-"""
-    URL mapping for the irclogs.qmsk.net site
-"""
-
-# urltree stuff
-from qmsk.web import urltree
-
-# our own handlers
-import handlers
-
-# for types
-import utils
-
-# for configuration
-import config
-
-# our URLTypes
-types   = dict(
-    # LogChannel
-    cid     = utils.URLChannelName(config.LOG_CHANNELS.dict()),
-
-    # datetime
-    date    = utils.URLDateType(config.URL_DATE_FMT),
-
-    # UTC timestamp
-    ts      = utils.URLTimestampType(),
-)
-
-# our URLConfig
-urls = url = urltree.URLConfig(type_dict=types)
-
-# urls
-index               = url('/',                                                              handlers.index                              )
-preferences         = url('/preferences',                                                   handlers.preferences_                       )
-channel_select      = url('/channel_select/?channel:cid',                                   handlers.channel_select                     )
-channel             = url('/channels/{channel:cid}',                                        handlers.channel_last,      count=20        )
-channel_last        = url('/channels/{channel:cid}/last/{count:int=100}/{type=}',           handlers.channel_last                       )
-channel_link        = url('/channels/{channel:cid}/link/{timestamp:ts}/?type=',             handlers.channel_link                       )
-channel_calendar    = url('/channels/{channel:cid}/calendar/{year:int=0}/{month:int=0}',    handlers.channel_calendar                   )
-channel_date        = url('/channels/{channel:cid}/date/{date:date}/?page:int=1&type=',     handlers.channel_date                       )
-channel_search      = url('/channels/{channel:cid}/search/?q=&page:int=1&max:int=1&type=&t:list=',  handlers.channel_search                     )
-
-# mapper
-mapper = urltree.URLTree(urls)
-

--- a/utils.py	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,137 +0,0 @@
-"""
-    Miscellaneous things
-"""
-
-import datetime, calendar, pytz
-import os, errno
-
-from qmsk.web.urltree import URLType
-
-class URLChannelName (URLType) :
-    """
-        Handle LogChannel names in URLs. Deals with instances of LogChannel
-    """
-
-    def __init__ (self, channels) :
-        """
-            Use the given { name -> LogChannel } dict
-        """
-
-        self.channels = channels
-    
-    def parse (self, chan_name) :
-        """
-            chan_name -> LogChannel
-        """
-
-        return self.channels[chan_name]
-
-    def build (self, chan) :
-        """
-            LogChannel -> chan_name
-        """
-
-        return chan.id
-
-class URLDateType (URLType) :
-    """
-        Handle dates in URLs as naive datetime objects (with indeterminate time info)
-    """
-
-    def __init__ (self, date_fmt) :
-        """
-            Format/parse dates using the given format
-        """
-
-        self.date_fmt = date_fmt
-    
-    def parse (self, date_str) :
-        """
-            date_str -> naive datetime.datetime
-        """
-        
-        return datetime.datetime.strptime(date_str, self.date_fmt)
-    
-    def build (self, date) :
-        """
-            datetime.date -> date_str
-        """
-
-        return date.strftime(self.date_fmt)
-
-class URLTimestampType (URLType) :
-    """
-        Handles an integer UNIX timestamp as an UTC datetime
-    """
-
-    def parse (self, timestamp_str) :
-        """
-            timestamp_str -> pytz.utc datetime.datetime
-        """
-        
-        return from_utc_timestamp(int(timestamp_str))
-    
-    def build (self, dtz) :
-        """
-            pytz.utc datetime.datetime -> timestamp_str
-        """
-        
-        return str(to_utc_timestamp(dtz))
-
-def from_utc_timestamp (timestamp) :
-    """
-        Converts a UNIX timestamp into a datetime.datetime
-    """
-
-    return datetime.datetime.utcfromtimestamp(timestamp).replace(tzinfo=pytz.utc)
-
-def to_utc_timestamp (dt) :
-    """
-        Converts a datetime.datetime into a UNIX timestamp
-    """
-
-    return calendar.timegm(dt.utctimetuple())
-
-def mtime (path, ignore_missing=False) :
-    """
-        Gets the mtime for the given path as an UTC datetime, or None, if the file doesn't exist and ignore_missing
-    """
-
-    try :
-        # stat
-        st = os.stat(path)
-    
-    # trap IOError
-    except os.error, e :
-        # ENOENT?
-        if ignore_missing and e.errno == errno.ENOENT :
-            return None
-
-        else :
-            raise
-
-    else :
-        # decode
-        return from_utc_timestamp(st.st_mtime)
-
-class FixedOffsetTimezone (pytz._FixedOffset) :
-    """
-        A Fixed-offset timezone with no DST info, compatible with pytz.
-
-        This is based on pytz._FixedOffset, but overrides dst() to return timedelta(0)
-    """
-
-    def __init__ (self, minutes) :
-        """
-            Minutes is simply the offset from UTC in minutes, positive or negative, at most 24h.
-        """
-
-        pytz._FixedOffset.__init__(self, minutes)
-
-    def dst (self, dt) :
-        """
-            No DST info
-        """
-
-        return datetime.timedelta(0)
-

--- a/version.py	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,106 +0,0 @@
-"""
-    Figuring out the project version
-
-    Currently this only supports mercurial
-"""
-
-# only load this once
-_VERSION = None
-
-def version_mercurial (path) :
-    """
-        Returns a (branch, tags, parents, modified) tuple for the given repo's working copy
-    """
-
-    global _VERSION
-
-    # cached?
-    if _VERSION :
-        return _VERSION
-
-    # code adapted from mercurial.commands.identify
-    from mercurial import ui, hg, encoding
-    from mercurial.node import short
-    
-    # open the repo
-    repo = hg.repository(ui.ui(), path)
-
-    # the working copy change context
-    ctx = repo[None]
-
-    # branch
-    branch = encoding.tolocal(ctx.branch())
-    
-    # map default -> None
-    if branch == 'default' :
-        branch = None
-    
-    # list of tags, without 'tip' tag
-    tags = [tag for tag in ctx.tags() if tag != 'tip']
-
-    # ctx's parents
-    parents = [short(p.node()) for p in ctx.parents()]
-
-    # local modifications?
-    modified = bool(ctx.files() + ctx.deleted())
-
-    # done
-    _VERSION = (branch, tags, parents, modified)
-    return _VERSION
-
-def version_string (path='.') :
-    """
-        Return a version string representing the version of the software at the given path.
-
-        Currently, this assumes that the given path points to a local Mercurial repo.
-    """
-    
-    try :
-        # get info
-        branch, tags, parents, modified = version_mercurial(path)
-
-    except :
-        # XXX: ignore
-        raise
-
-    # tags: <tag> [ "-" <tag> [ ... ]]
-    if tags :
-        return '-'.join(tags)
-
-    # revision: <parent> [ "+" <parent> [ ... ]] [ "+" ]
-    revision = '+'.join(p for p in parents) + ('+' if modified else '')
-    
-    if branch :
-        # branch: "(" <branch> ")" <revision>
-        return '(%s)%s' % (branch, revision)
-
-    else :
-        # plain: <revision>
-        return revision
-
-def version_link_hg (hgweb_url, path='.') :
-    """
-        Returns a link to a hgweb page for this version
-    """
-
-    # URL for revision ID
-    rev_url = lambda rev: '<a href="%(url)s/rev/%(rev)s">%(rev)s</a>' % dict(url=hgweb_url, rev=rev)
-
-     # get info
-    branch, tags, parents, modified = version_mercurial(path)
-
-    # tags: <tag> [ "-" <tag> [ ... ]] [ "+" ]
-    if tags :
-        return '-'.join(rev_url(tag) for tag in tags) + ('+' if modified else '')
-
-    # revision: <parent> [ "+" <parent> [ ... ]] [ "+" ]
-    revision = '+'.join(rev_url(p) for p in parents) + ('+' if modified else '')
-    
-    if branch :
-        # branch: "(" <branch> ")" <revision> [ "+" ]
-        return '(%s)%s' % (rev_url(branch), revision) + ('+' if modified else '')
-
-    else :
-        # plain: <revision>
-        return revision
-

--- a/wsgi.py	Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,31 +0,0 @@
-"""
-    Our custom WSGI application
-"""
-
-from qmsk.web import wsgi
-
-import urls, error
-
-# our custom app with custom error() method
-class Application (wsgi.Application) :
-    def __init__ (self) :
-        """
-            Construct wsgi.Application with our URLMapper
-        """
-
-        super(Application, self).__init__(urls.mapper)
-    
-    def handle_error (self, exc_info, env, start_response) :
-        """
-            Use error.build_error and return that
-        """
-        
-        # get info
-        status, content_type, body = error.build_error(env=env)
-
-        # headers
-        start_response(status, [('Content-type', content_type)], exc_info)
-
-        # body
-        return body
-

author	Tero Marttila <terom@fixme.fi>
	Sun, 13 Sep 2009 01:15:56 +0300
changeset 140	6db2527b67cf
parent 139	9c7769850195
child 141	65c98c9e1716

__init__.py		file \| annotate \| diff \| comparison \| revisions
bin/index.cgi		file \| annotate \| diff \| comparison \| revisions
bin/index.fcgi		file \| annotate \| diff \| comparison \| revisions
bin/search-index		file \| annotate \| diff \| comparison \| revisions
channels.py		file \| annotate \| diff \| comparison \| revisions
config.py		file \| annotate \| diff \| comparison \| revisions
error.py		file \| annotate \| diff \| comparison \| revisions
handlers.py		file \| annotate \| diff \| comparison \| revisions
helpers.py		file \| annotate \| diff \| comparison \| revisions
index.cgi		file \| annotate \| diff \| comparison \| revisions
index.fcgi		file \| annotate \| diff \| comparison \| revisions
log_channel.py		file \| annotate \| diff \| comparison \| revisions
log_formatter.py		file \| annotate \| diff \| comparison \| revisions
log_formatter_pil.py		file \| annotate \| diff \| comparison \| revisions
log_formatter_rss.py		file \| annotate \| diff \| comparison \| revisions
log_line.py		file \| annotate \| diff \| comparison \| revisions
log_parser.py		file \| annotate \| diff \| comparison \| revisions
log_search.py		file \| annotate \| diff \| comparison \| revisions
log_source.py		file \| annotate \| diff \| comparison \| revisions
preferences.py		file \| annotate \| diff \| comparison \| revisions
qmsk/__init__.py		file \| annotate \| diff \| comparison \| revisions
qmsk/irclogs/__init__.py		file \| annotate \| diff \| comparison \| revisions
qmsk/irclogs/channels.py		file \| annotate \| diff \| comparison \| revisions
qmsk/irclogs/config.py		file \| annotate \| diff \| comparison \| revisions
qmsk/irclogs/error.py		file \| annotate \| diff \| comparison \| revisions
qmsk/irclogs/handlers.py		file \| annotate \| diff \| comparison \| revisions
qmsk/irclogs/helpers.py		file \| annotate \| diff \| comparison \| revisions
qmsk/irclogs/log_channel.py		file \| annotate \| diff \| comparison \| revisions
qmsk/irclogs/log_formatter.py		file \| annotate \| diff \| comparison \| revisions
qmsk/irclogs/log_formatter_pil.py		file \| annotate \| diff \| comparison \| revisions
qmsk/irclogs/log_formatter_rss.py		file \| annotate \| diff \| comparison \| revisions
qmsk/irclogs/log_line.py		file \| annotate \| diff \| comparison \| revisions
qmsk/irclogs/log_parser.py		file \| annotate \| diff \| comparison \| revisions
qmsk/irclogs/log_search.py		file \| annotate \| diff \| comparison \| revisions
qmsk/irclogs/log_source.py		file \| annotate \| diff \| comparison \| revisions
qmsk/irclogs/preferences.py		file \| annotate \| diff \| comparison \| revisions
qmsk/irclogs/urls.py		file \| annotate \| diff \| comparison \| revisions
qmsk/irclogs/utils.py		file \| annotate \| diff \| comparison \| revisions
qmsk/irclogs/version.py		file \| annotate \| diff \| comparison \| revisions
qmsk/irclogs/wsgi.py		file \| annotate \| diff \| comparison \| revisions
scripts/search-index		file \| annotate \| diff \| comparison \| revisions
urls.py		file \| annotate \| diff \| comparison \| revisions
utils.py		file \| annotate \| diff \| comparison \| revisions
version.py		file \| annotate \| diff \| comparison \| revisions
wsgi.py		file \| annotate \| diff \| comparison \| revisions