restructure into package format - the qmsk.* stuff doesn't work so well though, requires a symlink for qmsk.web to work...
--- a/__init__.py Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-"""
- The irclogs.qmsk.net site is an IRC log browser
-"""
-
-# the URL mapper
-import urls
-
-# our RequestHandler
-handler = urls.mapper
-
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/index.cgi Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,49 @@
+#!/usr/bin/python2.5
+
+"""
+ CGI mode using qmsk.web.cgi
+"""
+
+def error () :
+ """
+ Dumps out a raw traceback of the current exception to stdout, call from except.
+
+ Used for low-level ImportError's
+ """
+
+ import sys
+
+ # if this import fails, we're doomed
+ from qmsk.irclogs import error
+
+ # format info
+ status, content_type, body = error.build_error()
+
+ # HTTP headers+body
+ sys.stdout.write('Status: %s\r\n' % status)
+ sys.stdout.write('Content-type: %s\r\n' % content_type)
+ sys.stdout.write('\r\n')
+ sys.stdout.write(body)
+
+def main () :
+ """
+ Build our wsgi.Application and run
+ """
+
+ try :
+ from qmsk.web import cgi_main
+ from qmsk.irclogs import wsgi
+
+ # create app
+ app = wsgi.Application()
+
+ # run once
+ cgi_main.run(app)
+
+ except :
+ # display error on stdout
+ error()
+
+if __name__ == '__main__' :
+ main()
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/index.fcgi Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,26 @@
+#!/usr/bin/python2.5
+# :set filetype=py
+
+"""
+ FastCGI mode using qmsk.web.fastcgi_main
+"""
+
+from qmsk.web import fastcgi_main
+
+# XXX: error handling for imports? Lighttp sucks hard at this
+from qmsk.irclogs import wsgi
+
+def main () :
+ """
+ Build our WSGIApplication and run
+ """
+
+ # create app
+ app = wsgi.Application()
+
+ # run once
+ fastcgi_main.run(app)
+
+if __name__ == '__main__' :
+ main()
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/search-index Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,640 @@
+#!/usr/bin/env python2.5
+
+"""
+ Tool for accessing the search index
+"""
+
+# XXX: fix path
+import sys; sys.path.insert(0, '.'); sys.path.insert(0, '..')
+
+import os, os.path, fcntl
+import datetime, pytz
+import optparse
+
+# configuration and the LogSearchIndex module
+from qmsk.irclogs import config, utils, log_search, channels
+
+def _open_index (options, open_mode) :
+ """
+ Opens the LogSearchIndex
+ """
+
+ return log_search.LogSearchIndex(config.LOG_CHANNELS, options.index_path, open_mode)
+
+
+def _open_index_and_channel (options, channel_name, open_mode) :
+ """
+ Opens+returns a LogSearchIndex and a LogChannel
+ """
+
+ # open the LogSearchIndex
+ index = _open_index(options, open_mode)
+
+ # open the channel
+ channel = config.LOG_CHANNELS.lookup(channel_name)
+
+ # return
+ return index, channel
+
+def _iter_insert_stats (index, channel, lines) :
+ """
+ Insert the given lines into the index.
+
+ Assumes the lines will be in time-order, and yields a series of (date, count) tuples for every date that lines
+ are inserted for
+ """
+
+ # last date
+ date = None
+
+ # count
+ count = 0
+
+ # iter lines
+ for line in lines :
+ # next day?
+ if not date or line.timestamp.date() != date :
+ if date :
+ # yield stats
+ yield date, count
+
+ # reset count
+ count = 0
+
+ # timestamp's date
+ date = line.timestamp.date()
+
+ # insert
+ index.insert_line(channel, line)
+
+ # count
+ count += 1
+
+ # final count?
+ if date and count :
+ yield date, count
+
+def _insert_lines (index, options, channel, lines) :
+ """
+ Insert the given lines into the index.
+
+ Assumes the lines will be in time-order, and prints out as status messages the date and count for the inserted lines
+ """
+
+ # iterate insert stats
+ for date, count in _iter_insert_stats(index, channel, lines) :
+ # output date header?
+ if not options.quiet :
+ print "%s: %s" % (date.strftime('%Y-%m-%d'), count),
+
+def _load_channel_date (index, options, channel, date) :
+ """
+ Loads the logs for the given date from the channel's LogSource into the given LogSearchIndex
+ """
+
+ if not options.quiet :
+ print "Loading date for channel %s" % channel.id
+
+ try :
+ # load lines for date
+ lines = channel.source.get_date(date)
+
+ except Exception, e :
+ if not options.skip_missing :
+ raise
+
+ if not options.quiet :
+ print "\tSkipped: %s" % (e, )
+
+ else :
+ # insert
+ _insert_lines(index, options, channel, lines)
+
+def _parse_date (options, date_str, tz=None, fmt='%Y-%m-%d') :
+ """
+ Parse the given datetime, using the given timezone(defaults to options.tz) and format
+ """
+
+ # default tz
+ if not tz :
+ tz = options.timezone
+
+ try :
+ # parse
+ return datetime.datetime.strptime(date_str, fmt).replace(tzinfo=tz)
+
+ except Exception, e :
+ raise CommandError("[ERROR] Invalid date: %s: %s" % (date_str, e))
+
+def _output_lines (options, lines) :
+ """
+ Display the formatted LogLines
+ """
+
+ # display as plaintext
+ for line, txt_data in options.formatter.format_txt(lines, full_timestamps=True) :
+ print txt_data
+
+class CommandError (Exception) :
+ """
+ Error with command-line arguments
+ """
+
+ pass
+
+def cmd_create (options) :
+ """
+ Creates a new index
+ """
+
+ # open index
+ index = _open_index(options, 'ctrunc' if options.force else 'c')
+
+ # that's all
+ pass
+
+def cmd_load (options, channel_name, *dates) :
+ """
+ Loads the logs for a specific channel for the given dates (in terms of the channe logs' timezone) into the index
+ """
+
+ # open index/channel
+ index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a')
+
+ # handle each date
+ for date_str in dates :
+ # prase date
+ try :
+ date = _parse_date(options, date_str, channel.source.tz)
+
+ # handle errors
+ except CommandError, e :
+ if options.skip_missing :
+ print "[ERROR] %s" % (date_name, e)
+
+ else :
+ raise
+
+ # otherwise, load
+ else :
+ _load_channel_date(index, options, channel, date)
+
+def cmd_load_month (options, channel_name, *months) :
+ """
+ Loads the logs for a specific channel for the given months (in terms of the channel's timezone) into the index
+ """
+
+ # open index/channel
+ index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a')
+
+ # handle each date
+ for month_str in months :
+ # prase date
+ try :
+ month = _parse_date(options, month_str, channel.source.tz, '%Y-%m')
+
+ # handle errors
+ except CommandError, e :
+ # skip?
+ if options.skip_missing :
+ if not options.quiet :
+ print "[ERROR] %s" % (date_name, e)
+ continue
+
+ else :
+ raise
+
+ # get the set of days
+ days = list(channel.source.get_month_days(month))
+
+ if not options.quiet :
+ print "Loading %d days of logs:" % (len(days))
+
+ # load each day
+ for date in days :
+ # convert to datetime
+ dt = datetime.datetime.combine(date, datetime.time(0)).replace(tzinfo=channel.source.tz)
+
+ # load
+ _load_channel_date(index, options, channel, dt)
+
+def cmd_search (options, channel_name, query) :
+ """
+ Search the index for events on a specific channel with the given query
+ """
+
+ # sanity-check
+ if options.create :
+ raise Exception("--create doesn't make sense for 'search'")
+
+ # open index/channel
+ index, channel = _open_index_and_channel(options, channel_name, 'r')
+
+ # search
+ lines = index.search_simple(channel, query)
+
+ # display
+ _output_lines(options, lines)
+
+def cmd_list (options, channel_name, *dates) :
+ """
+ List the indexed events for a specific date
+ """
+
+ # sanity-check
+ if options.create :
+ raise Exception("--create doesn't make sense for 'search'")
+
+ # open index/channel
+ index, channel = _open_index_and_channel(options, channel_name, 'r')
+
+ # ...for each date
+ for date_str in dates :
+ # parse date
+ date = _parse_date(options, date_str)
+
+ # list
+ lines = index.list(channel, date)
+
+ # display
+ _output_lines(options, lines)
+
+def _autoload_reset (options, channels) :
+ """
+ Reset old autoload state
+ """
+
+ # warn
+ if not options.quiet :
+ print "[WARN] Resetting autoload state for: %s" % ', '.join(channel.id for channel in channels)
+
+ # iter
+ for channel in channels :
+ # statefile path
+ statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id)
+
+ # is it present?
+ if not os.path.exists(statefile_path) :
+ if not options.quiet :
+ print "[WARN] No statefile found at %s" % statefile_path
+
+ else :
+ if not options.quiet :
+ print "\t%s: " % channel.id,
+
+ # remove the statefile
+ os.remove(statefile_path)
+
+ if not options.quiet :
+ print "OK"
+
+def cmd_autoload (options, *channel_names) :
+ """
+ Automatically loads all channel logs that have not been indexed yet (by logfile mtime)
+ """
+
+ # open index, nonblocking
+ index = _open_index(options, 'c?' if options.create else 'a?')
+
+ # default to all channels
+ if not channel_names :
+ channels = config.LOG_CHANNELS
+
+ else :
+ channels = [config.LOG_CHANNELS.lookup(channel_name) for channel_name in channel_names]
+
+ # reset autoload state?
+ if options.reset :
+ _autoload_reset(options, channels)
+ if not options.quiet :
+ print
+
+ # iterate channels
+ for channel in channels :
+ if not options.quiet :
+ print "Channel %s:" % channel.id
+
+ # no 'from' by default
+ after = None
+
+ # path to our state file
+ statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id)
+ statefile_tmppath = statefile_path + '.tmp'
+
+ # does it exist?
+ have_tmpfile = os.path.exists(statefile_tmppath)
+
+ # do we have a tempfile from a previous crash?
+ if have_tmpfile and not options.ignore_resume :
+ # first, open it...
+ statefile_tmp = open(statefile_tmppath, 'r+')
+
+ # ... then lock it
+ fcntl.lockf(statefile_tmp, fcntl.LOCK_EX | fcntl.LOCK_NB)
+
+ # read after timestamp
+ after_str = statefile_tmp.read().rstrip()
+
+ if after_str :
+ # parse timestamp
+ after = utils.from_utc_timestamp(int(after_str))
+
+ if not options.quiet :
+ print "\tContinuing earlier progress from %s" % after
+
+ else :
+ # ignore
+ if not options.quiet :
+ print "\t[WARN] Ignoring empty temporary statefile"
+
+ else :
+ # warn about old tmpfile that was ignored
+ if have_tmpfile and not options.quiet :
+ print "\t[WARN] Ignoring old tmpfile state"
+
+ # open new tempfile
+ statefile_tmp = open(statefile_tmppath, 'w')
+
+ # lock
+ fcntl.lockf(statefile_tmp, fcntl.LOCK_EX | fcntl.LOCK_NB)
+
+ # override?
+ if options.reload :
+ # load all
+ mtime = None
+
+ if not options.quiet :
+ print "\tForcing reload!"
+
+ # stat for mtime
+ else :
+ # stat for mtime, None if unknown
+ mtime = utils.mtime(statefile_path, ignore_missing=True)
+
+ if mtime and not options.quiet :
+ print "\tLast load time was %s" % mtime
+
+ elif not options.quiet :
+ print "\t[WARN] No previous load state! Loading full logs"
+
+ # only after some specific date?
+ if options.after :
+ # use unless read from tempfile
+ if not after :
+ after = options.after
+
+ if not options.quiet :
+ print "\tOnly including dates from %s onwards" % after
+
+ else :
+ if not options.quiet :
+ print "\t[WARN] Ignoring --from because we found a tempfile"
+
+ # only up to some specific date?
+ if options.until :
+ until = options.until
+
+ if not options.quiet :
+ print "\tOnly including dates up to (and including) %s" % until
+ else :
+ # default to now
+ until = None
+
+ # get lines
+ lines = channel.source.get_modified(mtime, after, until)
+
+ # insert
+ if not options.quiet :
+ print "\tLoading and inserting..."
+ print
+
+ # iterate insert() per day to display info and update progress
+ for date, count in _iter_insert_stats(index, channel, lines) :
+ # output date header?
+ if not options.quiet :
+ print "\t%10s: %d" % (date.strftime('%Y-%m-%d'), count)
+
+ # write temp state
+ statefile_tmp.seek(0)
+ statefile_tmp.write(str(utils.to_utc_timestamp(datetime.datetime.combine(date, datetime.time(0)))))
+ statefile_tmp.flush()
+
+ # write autoload state
+ open(statefile_path, 'w').close()
+
+ # close+delete tempfile
+ statefile_tmp.close()
+ os.remove(statefile_tmppath)
+
+ if not options.quiet :
+ print
+
+ # done
+ return
+
+def cmd_help (options, *args) :
+ """
+ Help about commands
+ """
+
+ import inspect
+
+ # general help stuff
+ options._parser.print_help()
+
+ # specific command?
+ if args :
+ # the command name
+ command, = args
+
+ # XXX: display info about specific command
+ xxx
+
+ # general
+ else :
+ print
+ print "Available commands:"
+
+ # build list of all cmd_* objects
+ cmd_objects = [(name, obj) for name, obj in globals().iteritems() if name.startswith('cmd_') and inspect.isfunction(obj)]
+
+ # sort alphabetically
+ cmd_objects.sort()
+
+ # iterate through all cmd_* objects
+ for cmd_func_name, cmd_func in cmd_objects :
+ # remove cmd_ prefix
+ cmd_name = cmd_func_name[4:]
+
+ # inspect
+ cmd_args, cmd_varargs, cmd_varkw, cmd_default = inspect.getargspec(cmd_func)
+ cmd_doc = inspect.getdoc(cmd_func)
+
+ # remove the "options" arg
+ cmd_args = cmd_args[1:]
+
+ # display
+ print "\t%10s %-30s : %s" % (cmd_name, inspect.formatargspec(cmd_args, cmd_varargs, None, cmd_default), cmd_doc)
+
+class MyOption (optparse.Option) :
+ """
+ Our custom types for optparse
+ """
+
+ def check_date (option, opt, value) :
+ """
+ Parse a date
+ """
+
+ try :
+ # parse
+ return datetime.datetime.strptime(value, '%Y-%m-%d')
+
+ # trap -> OptionValueError
+ except Exception, e :
+ raise optparse.OptionValueError("option %s: invalid date value: %r" % (opt, value))
+
+ def check_timezone (option, opt, value) :
+ """
+ Parse a timezone
+ """
+
+ try :
+ # parse
+ return pytz.timezone(value)
+
+ # trap -> OptionValueError
+ except Exception, e :
+ raise optparse.OptionValueError("option %s: invalid timezone: %r" % (opt, value))
+
+ def take_action (self, action, dest, opt, value, values, parser) :
+ """
+ Override take_action to handle date
+ """
+
+ if action == "parse_date" :
+ # get timezone
+ tz = values.timezone
+
+ # set timezone
+ value = value.replace(tzinfo=tz)
+
+ # store
+ return optparse.Option.take_action(self, 'store', dest, opt, value, values, parser)
+
+ else :
+ # default
+ return optparse.Option.take_action(self, action, dest, opt, value, values, parser)
+
+ TYPES = optparse.Option.TYPES + ('date', 'timezone')
+ TYPE_CHECKER = optparse.Option.TYPE_CHECKER.copy()
+ TYPE_CHECKER['date'] = check_date
+ TYPE_CHECKER['timezone'] = check_timezone
+ ACTIONS = optparse.Option.ACTIONS + ('parse_date', )
+ STORE_ACTIONS = optparse.Option.STORE_ACTIONS + ('parse_date', )
+ TYPED_ACTIONS = optparse.Option.TYPED_ACTIONS + ('parse_date', )
+ ACTIONS = optparse.Option.ACTIONS + ('parse_date', )
+
+def main (argv) :
+ """
+ Command-line main, with given argv
+ """
+
+ # define parser
+ parser = optparse.OptionParser(
+ usage = "%prog [options] <command> [ ... ]",
+ add_help_option = False,
+ option_class = MyOption,
+ )
+
+ # general options # # # #
+ general = optparse.OptionGroup(parser, "General Options")
+ general.add_option('-h', "--help", dest="help", help="Show this help message and exit",
+ action="store_true" )
+
+ general.add_option( "--formatter", dest="formatter_name", help="LogFormatter to use",
+ metavar="FMT", type="choice", default=config.PREF_FORMATTER_DEFAULT.name,
+ choices=[fmt_name for fmt_name in config.LOG_FORMATTERS.iterkeys()] )
+
+ general.add_option( "--index", dest="index_path", help="Index database path",
+ metavar="PATH", default=config.SEARCH_INDEX_PATH )
+
+ general.add_option( "--timezone", dest="timezone", help="Timezone for output",
+ metavar="TZ", type="timezone", default=pytz.utc )
+
+ general.add_option( "--force", dest="force", help="Force dangerous operation",
+ action="store_true" )
+
+ general.add_option( "--quiet", dest="quiet", help="Supress status messages",
+ action="store_true" )
+ parser.add_option_group(general)
+
+
+ # cmd_load options # # # #
+ load = optparse.OptionGroup(parser, "Load Options")
+ load.add_option( "--skip-missing", dest="skip_missing", help="Skip missing logfiles",
+ action="store_true" )
+
+ load.add_option( "--create", dest="create", help="Create index database",
+ action="store_true" )
+ parser.add_option_group(load)
+
+
+ # cmd_autoload options # # # #
+ autoload = optparse.OptionGroup(parser, "Autoload Options")
+ autoload.add_option( "--autoload-state", dest="autoload_state_path", help="Path to autoload state dir",
+ metavar="PATH", default=config.SEARCH_AUTOINDEX_PATH)
+
+ autoload.add_option( "--from", dest="after", help="Only autoload logfiles from the given date on",
+ metavar="DATE", type="date", action="parse_date", default=None )
+
+ autoload.add_option( "--until", dest="until", help="Only autoload logfiles up to (and including) the given date",
+ metavar="DATE", type="date", action="parse_date", default=None )
+
+ autoload.add_option( "--reload", dest="reload", help="Force reload lines",
+ action="store_true" )
+
+ autoload.add_option( "--reset", dest="reset", help="Reset old autload state",
+ action="store_true" )
+
+ autoload.add_option( "--ignore-resume", dest="ignore_resume", help="Do not try and resume interrupted autoload",
+ action="store_true" )
+ parser.add_option_group(autoload)
+
+ # parse
+ options, args = parser.parse_args(argv[1:])
+
+ # postprocess stuff
+ options._parser = parser
+ options.formatter = config.LOG_FORMATTERS[options.formatter_name](options.timezone, "%H:%M:%S", None, None)
+
+ # special-case --help
+ if options.help :
+ return cmd_help(options, *args)
+
+ # must have at least the command argument
+ if not args :
+ raise CommandError("Missing command")
+
+ # pop command
+ command = args.pop(0)
+
+ # get func
+ func = globals().get('cmd_%s' % command)
+
+ # unknown command?
+ if not func :
+ raise CommandError("Unknown command: %s" % command)
+
+ # call
+ func(options, *args)
+
+if __name__ == '__main__' :
+ try :
+ main(sys.argv)
+ sys.exit(0)
+
+ except CommandError, e :
+ print e
+ sys.exit(1)
+
--- a/channels.py Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-"""
- Our list of LogChannels
-"""
-
-class ChannelList (object) :
- """
- The list of channels, and related methods
- """
-
-
- def __init__ (self, channel_list) :
- """
- Initialize with the given channel dict
- """
-
- self.channel_list = channel_list
- self.channel_dict = dict((channel.id, channel) for channel in channel_list)
-
- def lookup (self, channel_name) :
- """
- Looks up the LogChannel for the given name
- """
-
- return self.channel_dict[channel_name]
-
- def dict (self) :
- """
- Returns a { name: LogChannel } dict
- """
- return self.channel_dict
-
- def __iter__ (self) :
- """
- Iterate over our defined LogChannel objects
- """
-
- return iter(self.channel_list)
-
--- a/config.py Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,112 +0,0 @@
-"""
- Configureable defaults
-"""
-
-import os.path, pytz
-from log_parser import IrssiParser
-from log_channel import LogChannel
-from log_source import LogSourceDecoder, LogDirectory
-from log_formatter import IrssiFormatter, DebugFormatter
-from channels import ChannelList
-import log_formatter
-
-# build relative paths to the dir containing this file
-relpath = lambda path : os.path.join(os.path.dirname(__file__), path)
-
-### ###
-### Configuration ###
-### ###
-
-# timezone to use for logs
-LOG_TIMEZONE = pytz.timezone('Europe/Helsinki')
-
-# timestamp format for logfiles
-LOG_TIMESTAMP_FMT = '%H:%M:%S'
-
-# the decoder used for logfiles
-LOG_DECODER = LogSourceDecoder((
- ('utf-8', 'strict'),
- ('latin-1', 'replace'),
-))
-
-# log filename format
-LOG_FILENAME_FMT = '%Y-%m-%d'
-
-# the log parser that we use
-LOG_PARSER = IrssiParser(LOG_TIMEZONE, LOG_TIMESTAMP_FMT)
-#LOG_PARSER_FULLTS = IrssiParser(LOG_TIMEZONE, '%Y%m%d%H%M%S')
-
-# the statically defined channel list
-LOG_CHANNELS = ChannelList([
- LogChannel('tycoon', "OFTC", "#tycoon",
- LogDirectory(relpath('/home/spbot/irclogs/tycoon'), LOG_TIMEZONE, LOG_PARSER, LOG_DECODER, LOG_FILENAME_FMT)
- ),
-
- LogChannel('openttd', "OFTC", "#openttd",
- LogDirectory(relpath('/home/spbot/irclogs/openttd'), LOG_TIMEZONE, LOG_PARSER, LOG_DECODER, LOG_FILENAME_FMT)
- ),
-
- LogChannel('test', "TEST", "#test",
- LogDirectory(relpath('/home/spbot/irclogs/test'), LOG_TIMEZONE, LOG_PARSER, LOG_DECODER, LOG_FILENAME_FMT)
- )
-])
-
-# URL to the hgweb installation for this code
-HGWEB_URL = "http://hg.qmsk.net/irclogs2"
-
-# path to the mercurial working copy containing this code
-HG_WC_PATH = relpath(".")
-
-# how to handle decode() errors for logfile lines
-LOG_SOURCE_DECODE_ERRORS = 'replace'
-
-# date format for URLs
-URL_DATE_FMT = '%Y-%m-%d'
-
-# month name format
-MONTH_FMT = '%B %Y'
-
-# timezone name format
-TIMEZONE_FMT = '%Z %z'
-
-# TTF fonts to use for drawing images
-FORMATTER_IMAGE_FONTS = {
- # XXX: no unicode support
- # 'default': (None, "Ugly default font" ),
- 'ttf-dejavu-mono': ("/usr/share/fonts/truetype/ttf-dejavu/DejaVuSansMono.ttf", "DejaVu Sans Mono" ),
- 'ttf-liberation-mono': ("/usr/share/fonts/truetype/ttf-liberation/LiberationMono-Regular.ttf", "Liberation Mono Regular" )
-}
-
-# available formatters
-LOG_FORMATTERS = {
- 'irssi': IrssiFormatter,
- 'debug': DebugFormatter,
-}
-
-# Cookie settings
-PREF_COOKIE_PATH = '/'
-PREF_COOKIE_EXPIRE_SECONDS = 1 * 365 * 24 * 60 * 60 # one year
-
-# default preferences
-PREF_TIME_FMT_DEFAULT = '%H:%M:%S'
-PREF_DATE_FMT_DEFAULT = '%Y-%m-%d'
-PREF_TIMEZONE_FALLBACK = pytz.utc
-PREF_FORMATTER_DEFAULT = IrssiFormatter
-PREF_COUNT_DEFAULT = 200
-PREF_COUNT_MAX = None
-PREF_IMAGE_FONT_DEFAULT = 'ttf-dejavu-mono'
-PREF_IMAGE_FONT_SIZE_DEFAULT = 12
-PREF_IMAGE_FONT_SIZE_MAX = 32
-
-# search line count options
-SEARCH_LINE_COUNT_OPTIONS = (
- (50, 50),
- (100, 100),
- (200, 200),
- (None, "∞"),
-)
-
-# search index database path
-SEARCH_INDEX_PATH = '/home/spbot/irclogs/search-index'
-SEARCH_AUTOINDEX_PATH = '/home/spbot/irclogs/search-autoindex'
-
--- a/error.py Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,183 +0,0 @@
-"""
- Build error messages
-"""
-
-import traceback, sys, cgi, urllib
-
-def truncate (msg, limit) :
- """
- Truncate the given message to <limit> chars
- """
-
- if len(msg) > limit :
- return msg[:limit-3] + '...'
-
- else :
- return msg
-
-def build_link (title, url) :
- return '<a href="%s">%s</a>' % (cgi.escape(url, True), cgi.escape(title))
-
-def build_error (exc_info=None, env=None) :
- """
- Dumps out a raw traceback of the given/current exception to stdout.
-
- If request_env, it should be a environment dict, like under WSGI, and will be used to display additional info
- about the request.
-
- Returns a (status, content-type, body) tuple, with all components being non-unicode strs.
- """
-
- # default for exc_info is current exception
- if not exc_info :
- exc_info = sys.exc_info()
-
- # request URL?
- if env :
- try :
- from qmsk.web.http import request_url
-
- url = request_url(env)
-
- except :
- # ignore
- url = None
- else :
- url = None
-
- # working copy path?
- try :
- from config import HG_WC_PATH, HGWEB_URL
-
- wc_path = HG_WC_PATH
- hgweb_url = HGWEB_URL
-
- except :
- # a good guess
- wc_path = '.'
- hgweb_url = None
-
- # version?
- try :
- from version import version_string, version_link_hg
-
- version = version_string(wc_path)
-
- if hgweb_url :
- version_href = version_link_hg(hgweb_url, wc_path)
-
- else :
- version_href = None
-
- except :
- version = None
- version_href = None
-
- # the exception type
- exception_str = traceback.format_exception_only(*exc_info[:2])[-1]
-
- # the exception traceback
- traceback_lines = traceback.format_exception(*exc_info)
-
- # XXX: make this configureable
- trac_url = "http://projects.qmsk.net/irclogs2/trac"
-
- # ticket list
- trac_query = build_link("All tickets", "%s/query" % trac_url)
-
- # submit ticket
- submit_args = dict(type='defect')
-
- # handle optional components
- if url :
- submit_args['url'] = url
- trac_query_url = build_link("Same URL", "%s/query?url=%s" % (trac_url, urllib.quote(url)))
- else :
- trac_query_url = ""
-
- if version :
- submit_args['revision'] = version
- trac_query_version = build_link("Same version", "%s/query?revision=%s" % (trac_url, urllib.quote(version)))
-
- else :
- trac_query_version = ""
-
- if exception_str :
- submit_args['summary'] = truncate(exception_str, 140)
- trac_query_err = build_link("Same error", "%s/query?summary=%s" % (trac_url, urllib.quote(exception_str.rstrip())))
-
- else :
- trac_query_err = ""
-
- if traceback_lines :
- # this is big
- submit_args['description'] = """\
-[Insert any additional information here]
-
-
-= Traceback =
-{{{
-%s
-}}}""" % ''.join(traceback_lines)
-
- # the trac newticket URL
- submit_url = "%s/newticket?%s" % (trac_url, '&'.join('%s=%s' % (urllib.quote(k), urllib.quote(v)) for k, v in submit_args.iteritems()))
-
- # return
- return ('500 Internal Server Error', 'text/html; charset=UTF-8', ("""\
-<html><head><title>500 Internal Server Error</title></head><body>
-<h1>Oops!</h1>
-<p>
- An error occured, which was not logged, and was not reported to anybody. It might be your fault, or it might be mine.
-</p>
-
-<p>
- You can try:
- <ol style="list-style-type: lower-alpha">
- <li><strong>Poking</strong> the administrator of this site to see if they respond</li>
- <li><strong>Looking</strong> for similar issue tickets with:
- <ul>
- <li>%(trac_query)s</li>
- <li>%(trac_query_url)s</li>
- <li>%(trac_query_version)s</li>
- <li>%(trac_query_err)s</li>
- </ul>
- </li>
- <li><strong>Submitting</strong> a new ticket using the following link (quick & easy):</li>
- </ol>
-</p>
-<pre>
- <a href="%(submit_url)s">%(submit_url_short)s</a>
-</pre>
-
-<h2>Details:</h2>
-<p>The page you tried to request was:</p>
-<pre>
- %(url)s
-</pre>
-
-<p>The software version is:</p>
-<pre>
- %(version_link)s
-</pre>
-
-<p>The error was:</p>
-<pre>
- %(exception)s
-</pre>
-
-<p>The traceback was:</p>
-<pre>%(traceback)s</pre>
-</body></html>""" % dict(
- url = url if url else 'Unknown',
- version_link = version_href if version_href else 'Unknown',
- exception = truncate(exception_str, 512),
- traceback = cgi.escape(''.join(' ' + line for line in traceback_lines)),
- trac_query = trac_query,
- trac_query_url = trac_query_url,
- trac_query_version = trac_query_version,
- trac_query_err = trac_query_err,
- submit_url = submit_url,
- submit_url_short = truncate(submit_url, 120)
- )).encode('utf-8'))
-
--- a/handlers.py Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,292 +0,0 @@
-"""
- Our URL action handlers
-"""
-
-import datetime, calendar, pytz
-
-from qmsk.web import http, template
-
-import urls, channels, helpers
-import preferences as prefs
-from preferences import preferences
-import config, log_search
-
-# load templates from here
-templates = template.TemplateLoader("templates",
- _helper_class = helpers.Helpers,
- urls = urls,
- channel_list = config.LOG_CHANNELS,
- config = config,
-)
-
-# return a http.Response for the given text in the given format
-def _render_type (request, channel, lines, type, full_timestamps=False) :
- """
- Render the given LogLines as a http.Response in the given format, which is one of:
- html - XXX: not supported
- txt - Plaintext
- png - PNG image
- rss - RSS feed
- """
-
- # load related preferences
- formatter = request.prefs['formatter']
-
- kwargs = dict(
- full_timestamps = full_timestamps
- )
-
- # we can render in various modes...
- if type in ('html', None) :
- xxx
-
- elif type == 'txt' :
- # plaintext
- lines = formatter.format_txt(lines, **kwargs)
-
- # build data
- data = '\n'.join(data for line, data in lines)
-
- return http.Response(data, 'text/plain')
-
- elif type == 'png' :
- # PNG image
- png_data = formatter.format_png(lines, **kwargs)
-
- return http.Response(png_data, 'image/png', charset=None)
-
- elif type == 'rss' :
- # RSS feed
- rss_data = formatter.format_rss(lines, **kwargs)
-
- # XXX: fix to render as unicode?
- return http.Response(rss_data, 'application/rss+xml', charset=None)
-
- else :
- raise http.ResponseError("Unrecognized type: %r" % (type, ))
-
-def _render_date (request, channel, date, lines, type, count, page, max) :
- """
- Render the given LogLines as a http.Response for channel_date
- """
-
- # type?
- if type :
- # special type
- return _render_type(request, channel, lines, type)
-
- else :
- # format HTML
- lines = request.prefs['formatter'].format_html(lines)
-
- # render
- return templates.render_to_response("channel_date",
- req = request,
- prefs = request.prefs,
- channel = channel,
- date = date,
- count = count,
- page = page,
- max = max,
- lines = lines,
-
- # for prev/next date
- date_next = channel.source.get_next_date(date),
- date_prev = channel.source.get_prev_date(date),
- )
-
-@preferences.handler()
-def index (request) :
- """
- The topmost index page, display a list of available channels, perhaps some general stats
- """
-
- return templates.render_to_response("index",
- req = request,
- prefs = request.prefs,
- )
-
-# XXX: fix this namespace crap
-@preferences.handler()
-def preferences_ (request) :
- """
- Preferences editor
- """
-
- # POST?
- if request.is_post() :
- # update any modified preferences
- for pref in preferences.pref_list :
- # get the POST'd value, default = None
- post_value = request.get_post(pref.name, None)
-
- # skip non-specified values
- # XXX: this is to not clobber timezone_offset to None
- if post_value is None :
- continue
-
- # parse the POST'd value, None -> default
- new_value = request.prefs.parse(pref, post_value)
-
- # update if given and changed
- if new_value != request.prefs[pref] :
- request.prefs.set(pref.name, new_value)
-
- # render
- return templates.render_to_response("preferences",
- req = request,
- prefs = request.prefs,
- preferences = prefs,
- )
-
-def channel_select (request, channel) :
- """
- Redirect to the appropriate channel_view
- """
-
- return http.Redirect(urls.channel.build(request, channel=channel))
-
-@preferences.handler(prefs.formatter)
-def channel_last (request, channel, count, formatter, type=None) :
- """
- The main channel view page, displaying the most recent lines
- """
-
- # get latest events
- lines = channel.source.get_latest(count)
-
- # type?
- if type :
- # other format
- return _render_type(request, channel, lines, type)
-
- else :
- # format HTML
- lines = formatter.format_html(lines)
-
- # render page
- return templates.render_to_response("channel_last",
- req = request,
- prefs = request.prefs,
- channel = channel,
- count = count,
- lines = lines,
- )
-
-@preferences.handler(prefs.formatter, prefs.timezone, prefs.count)
-def channel_link (request, channel, timestamp, formatter, timezone, count, type=None) :
- """
- Display channel_date for specific UTC timestamp
- """
-
- # convert timestamp to user's timezone
- timestamp = timestamp.astimezone(timezone)
-
- # get correct day's correct page of lines
- page, max, lines = channel.source.get_date_paged(timestamp, count)
-
- # render channel_date
- return _render_date (request, channel, timestamp, lines, type, count, page, max)
-
-@preferences.handler(prefs.timezone)
-def channel_calendar (request, channel, year, month, timezone) :
- """
- Display a list of avilable logs for some month
- """
-
- # current date as default
- now = timezone.localize(datetime.datetime.now())
-
- # target year/month
- target = timezone.localize(datetime.datetime(
- year = year if year else now.year,
- month = month if month else now.month,
- day = 1
- ))
-
- # display calendar
- return templates.render_to_response("channel_calendar",
- req = request,
- prefs = request.prefs,
- channel = channel,
- month = target,
- )
-
-@preferences.handler(prefs.count, prefs.timezone)
-def channel_date (request, channel, date, count, timezone, page=1, type=None) :
- """
- Display all log data for the given date
- """
-
- # convert date to user's timezone
- date = timezone.localize(date)
-
-# print
-# print "channel_date: date=%s" % date
-
- # get that day's events, either paged or not
- if page :
- page, max, lines = channel.source.get_date_paged(date, count, page)
-
- else :
- lines = channel.source.get_date(date)
- max = None
-
- # render channel_date
- return _render_date (request, channel, date, lines, type, count, page, max)
-
-@preferences.handler(prefs.formatter, prefs.count)
-def channel_search (request, channel, formatter, count, q=None, page=1, max=1, type=None, t=None) :
- """
- Display the search form for the channel for GET, or do the search for POST.
- """
-
- # calculate skip offset from page/count
- skip = (page - 1) * count
-
- # got a search query?
- if q :
- # attribute targets
- targets = dict(('search_%s' % target, True) for target in t if target in ('msg', 'nick')) if t else {}
-
- try :
- # do search
- lines = log_search.get_index().search_simple(channel, q, count, skip, **targets)
-
- # update max?
- if max and page > max :
- max = page
-
- except log_search.NoResultsFound :
- # no results
- lines = None
-
- else :
- # just display the search form
- lines = None
-
- # type?
- if type and lines :
- # special type
- return _render_type(request, channel, lines, type, full_timestamps=True)
-
- else :
- # format lines to HTML if any
- if lines :
- # format
- lines = formatter.format_html(lines, full_timestamps=True)
-
- # render page
- return templates.render_to_response("channel_search",
- req = request,
- prefs = request.prefs,
- channel = channel,
- search_query = q,
- search_targets = t,
- count = count,
- page = page,
- skip = skip,
- max = max,
- lines = lines,
- )
-
--- a/helpers.py Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,243 +0,0 @@
-"""
- Some additional helpers
-"""
-
-import datetime
-import calendar as _calendar
-
-import qmsk.web.helpers
-
-import preferences, urls, config, version
-
-class Helpers (qmsk.web.helpers.Helpers) :
- """
- Our set of helpers, inheriting from base helpers
- """
-
- # set contructor...
- set = set
-
- # reference to calendar instance
- calendar = _calendar.Calendar()
-
- # list of (month_num, month_name) for the months in the year
- months = list(enumerate(_calendar.month_name))[1:]
-
- def version_link (self) :
- """
- Returns a <a href> representing this version of the software
- """
-
- return version.version_link_hg(config.HGWEB_URL, config.HG_WC_PATH)
-
- def tz_name (self, tz) :
- """
- Returns a string describing the given timezone
- """
-
- return self.now().strftime(config.TIMEZONE_FMT)
-
- def fmt_month (self, date) :
- """
- Formats a month
- """
-
- return date.strftime(config.MONTH_FMT)
-
- def fmt_weekday (self, wday) :
- """
- Formats an abbreviated weekday name
- """
-
- return _calendar.day_abbr[wday]
-
- def build_date (self, month, mday) :
- """
- Returns a datetime.datetime for the given (month.year, month.month, mday)
- """
-
- return self.ctx['prefs'][preferences.timezone].localize(datetime.datetime(month.year, month.month, mday))
-
- def now (self) :
- """
- Build current time
- """
-
- return self.ctx['prefs'][preferences.timezone].localize(datetime.datetime.now())
-
- def today (self) :
- """
- Build today's date
- """
-
- return self.now().date()
-
- def is_today (self, dt) :
- """
- Checks if the given datetime.datetime is today
- """
-
- # compare with current date
- return dt.date() == self.today()
-
- def is_this_month (self, month) :
- """
- Checks the given month is the current month
- """
-
- today = self.today()
-
- return (month.year == today.year and month.month == today.month)
-
- @staticmethod
- def _wrap_year (year, month) :
- """
- Wraps month to between [1, 12], spilling overflow/underflow by to year.
-
- Returns (year, month)
- """
-
- # underflow?
- if month == 0 :
- # wrap to previous year
- return (year - 1, 12)
-
- # overflow?
- elif month == 13 :
- # wrap to next year
- return (year + 1, 1)
-
- # sane value
- elif 1 <= month <= 12 :
- return (year, month)
-
- # insane value
- else :
- assert False, "invalid year/month: %d/%d" % (year, month)
-
- def prev_month (self, month) :
- """
- Returns the month preceding the given one (as a datetime.datetime)
- """
-
- # previous month
- y, m = self._wrap_year(month.year, month.month - 1)
-
- # build datetime
- return datetime.datetime(year=y, month=m, day=1, tzinfo=month.tzinfo)
-
- def next_month (self, month) :
- """
- Returns the month following the given one (as a datetime.datetime)
- """
-
- # previous month
- y, m = self._wrap_year(month.year, month.month + 1)
-
- # build datetime
- return datetime.datetime(year=y, month=m, day=1, tzinfo=month.tzinfo)
-
- def fmt_time (self, time=None) :
- """
- Format given time, or current time
- """
-
- # defaults
- if not time :
- time = self.now()
-
- return time.strftime(self.ctx['prefs'][preferences.time_format])
-
- def fmt_date (self, date=None) :
- """
- Format given date, or current date
- """
-
- # defaults
- if not date :
- date = self.now()
-
- return date.strftime(self.ctx['prefs'][preferences.date_format])
-
- def url (self, url, **params) :
- """
- Build URL with our request object
- """
-
- return url.build(self.ctx['req'], **params)
-
- # old name
- build_url = url
-
- def utc_timestamp (self, dtz) :
- """
- Build an UTC timestamp from the given datetime
- """
-
- return urls.types['ts'].build(dtz)
-
- def skip_next (self, count, skip) :
- """
- Return skip offset for next page
- """
-
- return count + skip
-
- def skip_page (self, count, page) :
- """
- Skip to page
- """
-
- if page :
- return count * page
-
- else :
- return None
-
- def skip_prev (self, count, skip) :
- """
- Return skip offset for previous page, None for first page
- """
-
- if skip > count :
- return skip - count
-
- else :
- return None
-
- def max (self, *values) :
- """
- Returns the largest of the given values
- """
-
- return max(values)
-
- def select_options (self, key_values, selected_key=None) :
- """
- Render a series of <option> tags for <select>.
-
- The given key_values is an iterable of (key, value) pairs, key may be None if it's the same as value.
- """
-
- return '\n'.join(
- '\t<option%s%s>%s</option>' % (
- ' value="%s"' % key if key is not None else '',
- ' selected="selected"' if (key if key is not None else value) == selected_key else '',
- value
- ) for key, value in key_values
- )
-
- def prev_date (self, date) :
- """
- Returns the previous date for the given datetime-date
- """
-
- return datetime.datetime(date.year, date.month, date.day, tzinfo=date.tzinfo) - datetime.timedelta(days=1)
-
- def next_date (self, date) :
- """
- Returns the previous date for the given datetime-date
- """
-
- return datetime.datetime(date.year, date.month, date.day, tzinfo=date.tzinfo) + datetime.timedelta(days=1)
-
--- a/index.cgi Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,47 +0,0 @@
-#!/usr/bin/python2.5
-
-"""
- CGI mode using qmsk.web.cgi
-"""
-
-def error () :
- """
- Dumps out a raw traceback of the current exception to stdout, call from except.
-
- Used for low-level ImportError's
- """
-
- # if this import fails, we're doomed
- import sys, error
-
- # format info
- status, content_type, body = error.build_error()
-
- # HTTP headers+body
- sys.stdout.write('Status: %s\r\n' % status)
- sys.stdout.write('Content-type: %s\r\n' % content_type)
- sys.stdout.write('\r\n')
- sys.stdout.write(body)
-
-def main () :
- """
- Build our wsgi.Application and run
- """
-
- try :
- from qmsk.web import cgi_main
- import wsgi
-
- # create app
- app = wsgi.Application()
-
- # run once
- cgi_main.run(app)
-
- except :
- # display error on stdout
- error()
-
-if __name__ == '__main__' :
- main()
-
--- a/index.fcgi Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,26 +0,0 @@
-#!/usr/bin/python2.5
-# :set filetype=py
-
-"""
- FastCGI mode using qmsk.web.fastcgi_main
-"""
-
-from qmsk.web import fastcgi_main
-
-# XXX: error handling for imports? Lighttp sucks hard at this
-import wsgi
-
-def main () :
- """
- Build our WSGIApplication and run
- """
-
- # create app
- app = wsgi.Application()
-
- # run once
- fastcgi_main.run(app)
-
-if __name__ == '__main__' :
- main()
-
--- a/log_channel.py Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,54 +0,0 @@
-"""
- A channel represents a series of log events, stored in some log source
-"""
-
-import log_search
-
-class LogChannel (object) :
- """
- A single IRC channel, logged to some specific place
- """
-
- def __init__ (self, id, network, name, source) :
- """
- Initialize this channel from the given identifier key, network name, channel name, and LogSource
- """
-
- # store
- self.id = id
- self.network = network
- self.name = name
- self.source = source
-
- # bind source
- self.source.bind_channel(self)
-
- @property
- def title (self) :
- """
- Title is 'Network - #channel'
- """
-
- return "%s - %s" % (self.network, self.name)
-
- def search (self, query) :
- """
- Perform a search on this channel, returning a sequence of LogLines
- """
-
- return log_search.index.search_simple(self, query)
-
- def __str__ (self) :
- """
- Returns self.title
- """
-
- return self.title
-
- def __repr__ (self) :
- """
- Uses self.id
- """
-
- return "LogChannel(%s)" % (self.id, )
-
--- a/log_formatter.py Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,257 +0,0 @@
-"""
- Format LogLines into some other representation
-"""
-
-import re, xml.sax.saxutils
-
-from log_line import LogTypes
-from log_formatter_pil import PILImageFormatter
-from log_formatter_rss import RSSFormatter
-
-class LogFormatter (object) :
- """
- Provides a method to format series of LogLines into various output formats, with varying themes.
- """
-
- # machine-readable name
- name = None
-
- # human-readable name
- title = None
-
- ## parameters
- # use a fixed-width font for HTML output
- html_fixedwidth = True
-
- def __init__ (self, tz, timestamp_fmt, img_ttf_path, img_font_size) :
- """
- Initialize to format timestamps with the given timezone and timestamp.
-
- Use the given TTF font to render image text with the given size, if given, otherwise, a default one.
- """
-
- # store
- self.tz = tz
- self.timestamp_fmt = timestamp_fmt
- self.img_ttf_path = img_ttf_path
- self.img_font_size = img_font_size
-
- # XXX: harcoded
- self.date_fmt = '%Y-%m-%d'
-
- def _format_line_text (self, line, template_dict, type=None, full_timestamp=False, **extra) :
- """
- Format the given line as text, using the given { type: string template } dict.
-
- If type is given, then it overrides line.type
-
- Any additional keyword args will also be available for the template to use
- """
-
- # default type?
- if type is None :
- type = line.type
-
- # look up the template
- if type in template_dict :
- template = template_dict[type]
-
- else :
- raise Exception("Format template not defined for type: %s" % LogTypes.name_from_code(type))
-
- # convert timestamp into display timezone
- dtz = line.timestamp.astimezone(self.tz)
-
- # full timestamps?
- if full_timestamp :
- # XXX: let the user define a 'datetime' format instead?
- timestamp_fmt = self.date_fmt + ' ' + self.timestamp_fmt
-
- else :
- timestamp_fmt = self.timestamp_fmt
-
- # breakdown source
- source_nickname, source_username, source_hostname, source_chanflag = line.source
- target_nickname = line.target
-
- # format with dict
- return template % dict(
- channel_name = line.channel.name,
- datetime = dtz.strftime('%a %b %d %H:%M:%S %Y'),
- date = dtz.strftime(self.date_fmt),
- timestamp = dtz.strftime(timestamp_fmt),
- source_nickname = source_nickname,
- source_username = source_username,
- source_hostname = source_hostname,
- source_chanflag = source_chanflag,
- target_nickname = target_nickname,
- message = line.data,
- **extra
- )
-
- def format_txt (self, lines, full_timestamps=False) :
- """
- Format given lines as plaintext.
-
- If full_timestamps is given, the output will contain full timestamps with both date and time.
-
- No trailing newlines.
- """
-
- abstract
-
- def format_html (self, lines, full_timestamps=False) :
- """
- Format as HTML.
-
- See format_txt for information about arguments
- """
-
- abstract
-
- def format_png (self, lines, full_timestamps=False) :
- """
- Format as a PNG image, returning the binary PNG data
- """
-
- abstract
-
- def format_rss (self, lines, full_timestamps=False) :
- """
- Format as an XML RSS document
- """
-
- abstract
-
-class BaseHTMLFormatter (LogFormatter) :
- """
- Implements some HTML-formatting utils
- """
-
- # parameters
- html_fixedwidth = True
-
- # regexp to match URLs
- URL_REGEXP = re.compile(r"http://\S+")
-
- def _process_links (self, line) :
- """
- Processed the rendered line, adding in <a href>'s for things that look like URLs, returning the new line.
-
- The line should already be escaped
- """
-
- def _encode_url (match) :
- # encode URL
- url_html = match.group(0)
- url_link = xml.sax.saxutils.unescape(url_html)
-
- return '<a href="%(url_link)s">%(url_html)s</a>' % dict(url_link=url_link, url_html=url_html)
-
- return self.URL_REGEXP.sub(_encode_url, line)
-
- def format_html (self, lines, **kwargs) :
- """
- Just uses format_txt, but processes links, etc
- """
-
- # format using IrssiTextFormatter
- for line, txt in self.format_txt(lines, **kwargs) :
- # escape HTML
- html = xml.sax.saxutils.escape(txt)
-
- # process links
- html = self._process_links(html)
-
- # yield
- yield line, html
-
-
-class IrssiTextFormatter (RSSFormatter, PILImageFormatter, LogFormatter) :
- """
- Implements format_txt for irssi-style output
- """
-
- # format definitions by type
- __FMT = {
- LogTypes.RAW : "%(timestamp)s %(data)s",
- LogTypes.LOG_OPEN : "--- Log opened %(datetime)s",
- LogTypes.LOG_CLOSE : "--- Log closed %(datetime)s",
- 'DAY_CHANGED' : "--- Day changed %(date)s",
-
- LogTypes.MSG : "%(timestamp)s <%(source_chanflag)s%(source_nickname)s> %(message)s",
- LogTypes.NOTICE : "%(timestamp)s -%(source_nickname)s- %(message)s",
- LogTypes.ACTION : "%(timestamp)s * %(source_nickname)s %(message)s",
-
- LogTypes.JOIN : "%(timestamp)s -!- %(source_nickname)s [%(source_username)s@%(source_hostname)s] has joined %(channel_name)s",
- LogTypes.PART : "%(timestamp)s -!- %(source_nickname)s [%(source_username)s@%(source_hostname)s] has left %(channel_name)s [%(message)s]",
- LogTypes.KICK : "%(timestamp)s -!- %(target_nickname)s was kicked from %(channel_name)s by %(source_nickname)s [%(message)s]",
- LogTypes.MODE : "%(timestamp)s -!- mode/%(channel_name)s [%(message)s] by %(source_nickname)s",
-
- LogTypes.NICK : "%(timestamp)s -!- %(source_nickname)s is now known as %(target_nickname)s",
- LogTypes.QUIT : "%(timestamp)s -!- %(source_nickname)s [%(source_username)s@%(source_hostname)s] has quit [%(message)s]",
-
- LogTypes.TOPIC : "%(timestamp)s -!- %(source_nickname)s changed the topic of %(channel_name)s to: %(message)s",
- 'TOPIC_UNSET' : "%(timestamp)s -!- Topic unset by %(source_nickname)s on %(channel_name)s",
-
- LogTypes.SELF_NOTICE: "%(timestamp)s -%(source_nickname)s- %(message)s",
- LogTypes.SELF_NICK : "%(timestamp)s -!- %(source_nickname)s is now known as %(target_nickname)s",
-
- LogTypes.NETSPLIT_START :
- "%(timestamp)s -!- Netsplit %(source_hostname)s <-> %(target_nickname)s quits: %(_netsplit_targets)s",
- LogTypes.NETSPLIT_END :
- "%(timestamp)s -!- Netsplit over, joins: %(_netsplit_targets)s",
- }
-
- def format_txt (self, lines, full_timestamps=False) :
- # ...handle each line
- for line in lines :
- # extra args
- extra = {}
-
- # default to line.type
- type = line.type
-
- # special formatting for unset-Topic
- if line.type == LogTypes.TOPIC and line.data is None :
- type = 'TOPIC_UNSET'
-
- # format netsplit stuff
- elif line.type & LogTypes._NETSPLIT_MASK :
- # format the netsplit-targets stuff
- extra['_netsplit_targets'] = line.data
-
- # using __TYPES
- yield line, self._format_line_text(line, self.__FMT, type, full_timestamps, **extra)
-
-class IrssiFormatter (BaseHTMLFormatter, IrssiTextFormatter) :
- """
- Implements plain black-and-white irssi-style formatting
- """
-
- # name
- name = 'irssi'
- title = "Irssi (plain)"
-
-class DebugFormatter (BaseHTMLFormatter) :
- """
- Implements a raw debug-style formatting of LogLines
- """
-
- # name
- name = 'debug'
- title = "Raw debugging format"
-
- def format_txt (self, lines, full_timestamps=False) :
- # iterate
- for line in lines :
- # just dump
- yield line, unicode(line)
-
-def by_name (name) :
- """
- Lookup and return a class LogFormatter by name
- """
-
- return FORMATTERS[name]
-
--- a/log_formatter_pil.py Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,81 +0,0 @@
-"""
- Use of PIL to render the image formatting stuff
-"""
-
-from PIL import Image, ImageDraw, ImageFont
-
-from cStringIO import StringIO
-
-class PILImageFormatter (object) :
- """
- Mixin for LogFormatter that implements the basic image-rendering operations on top of format_txt
- """
-
- # the font we load
- font = None
-
- # line spacing in pixels
- LINE_SPACING = 1
-
- def _load_font (self) :
- """
- Use the configured img_ttf_path for a TrueType font, or a default one
- """
-
- if self.font :
- pass
-
- elif self.img_ttf_path :
- # load truetype with configured size
- self.font = ImageFont.truetype(self.img_ttf_path, self.img_font_size)
-
- else :
- # default
- self.font = ImageFont.load_default()
-
- return self.font
-
- def format_png (self, lines, **kwargs) :
- """
- Build and return a PNG image of the given lines, using format_txt
- """
-
- # load font
- font = self._load_font()
-
- # build list of plain-text line data
- lines = list(data for line, data in self.format_txt(lines, **kwargs))
-
- # lines sizes
- line_sizes = [font.getsize(line) for line in lines]
-
- # figure out how wide/high the image will be
- width = max(width for width, height in line_sizes)
- height = sum(height + self.LINE_SPACING for width, height in line_sizes)
-
- # create new B/W image
- img = Image.new('L', (width, height), 0xff)
-
- # drawer
- draw = ImageDraw.Draw(img)
-
- # starting offset
- offset_y = 0
-
- # draw the lines
- for line, (width, height) in zip(lines, line_sizes) :
- # draw
- draw.text((0, offset_y), line, font=font)
-
- # next offset
- offset_y += height + self.LINE_SPACING
-
- # output buffer
- buf = StringIO()
-
- # save
- img.save(buf, 'png')
-
- # return data
- return buf.getvalue()
-
--- a/log_formatter_rss.py Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-"""
- Uses PyRSS2Gen to generate XML RSS documents
-"""
-
-import PyRSS2Gen as RSS2Gen
-import datetime, pytz
-
-class RSSFormatter (object) :
- """
- Mixin for LogFormatter that implements the basic RSS-rendering stuff on top of format_html
- """
-
- def format_rss (self, lines, **kwargs) :
- """
- Process using format_html
- """
-
- # build the RSS2 object and return the XML
- return RSS2Gen.RSS2(
- title = "IRC RSS feed",
- link = "http://irclogs.qmsk.net/",
- description = "A stupid RSS feed that nobody sane would ever use",
-
- # XXX: GMT
- lastBuildDate = datetime.datetime.utcnow(),
-
- items = [
- RSS2Gen.RSSItem(
- # use the formatted HTML data as the title
- title = html_data,
-
- # timestamp
- pubDate = line.timestamp.astimezone(pytz.utc),
-
- # link
- link = "http://xxx/",
-
- ) for line, html_data in self.format_html(lines, **kwargs)
- ]
- ).to_xml('utf8')
-
--- a/log_line.py Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,186 +0,0 @@
-"""
- An IRC logfile consists of a series of lines/events
-"""
-
-class LogTypes :
- """
- Definitions of the various LogLines types:
-
- LogTypes.RAW
- LogTypes.LOG_OPEN
- LogTypes.LOG_CLOSE
-
- LogTypes.MSG
- LogTypes.NOTICE
- LogTypes.ACTION
-
- LogTypes.JOIN
- LogTypes.PART
- LogTypes.KICK
- LogTypes.MODE
-
- LogTypes.NICK
- LogTypes.QUIT
-
- LogTypes.TOPIC
-
- LogTypes.SELF_NOTICE
- LogTypes.SELF_NICK
- """
-
- # list of LogType values by name
- LIST = [
- ## special
- # unknown type, may or may not have a timestamp, no source, only data
- ('RAW', 0x01),
-
- # log opened
- ('LOG_OPEN', 0x02),
-
- # log closed
- ('LOG_CLOSE', 0x03),
-
- ## messages
- # <source> sent message <data> to <channel>
- ('MSG', 0x10),
-
- # <source> sent notice with message <data> to <channel>
- ('NOTICE', 0x11),
-
- # <source> sent CTCP action with message <data> to <channel>
- ('ACTION', 0x12),
-
- ## user-channel stats
- # <source> joined <channel>
- ('JOIN', 0x21),
-
- # <source> left <channel> with message <data>
- ('PART', 0x22),
-
- # <source> kicked <target> from <channel> with message <data>
- ('KICK', 0x25),
-
- # <source> changed modes on <channel> with modestring <data>
- ('MODE', 0x26),
-
- ## user status
- # <source> changed nickname to <target>
- ('NICK', 0x31),
-
- # <source> quit the network with quit-message <data>
- ('QUIT', 0x32),
-
- ## general channel status
- # <source> changed the topic of <channel> to <data>
- # data may be None if the topic was unset
- ('TOPIC', 0x41),
-
- ## our own actions
- # we (<source>) sent a notice with message <data> to <channel>
- ('SELF_NOTICE', 0x51),
-
- # we (<source>) changed nickname to <target>
- ('SELF_NICK', 0x52),
-
- ## slightly weirder bits
- # netsplit between <source_hostname> and <target_hostname>, <data> is a space-separated list of <chanflags><nickname>s affected
- # the last item in the list of nicknames may also be of the form "+<count>", where count is the number of additional, but hidden, nicknames affected
- ('NETSPLIT_START', 0x61),
-
- # netsplit over, <data> is a list of users affected, see NETSPLIT_START
- ('NETSPLIT_END', 0x062),
- ]
-
- @classmethod
- def name_from_code (cls, code) :
- """
- Looks up a LogType name by code
- """
-
- return dict((type, name) for name, type in cls.LIST)[code]
-
-# apply as attributes
-for name, code in LogTypes.LIST :
- setattr(LogTypes, name, code)
-
-# masks
-LogTypes._NETSPLIT_MASK = 0x60
-
-class LogLine (object) :
- """
- An event on some specific channel
- """
-
- # the LogChannel
- channel = None
-
- # the offset, only garunteed to be unique for a specific channel and date
- offset = None
-
- # the event type, as defiend in LogTypes
- type = None
-
- # the UTC timestamp of the event
- timestamp = None
-
- # the source, this should be a (nickname, username, hostname, chanflags) tuple
- source = None
-
- # possible target nickname for certain types (kick, nick)
- target = None
-
- # associated data (message, etc)
- data = None
-
- def __init__ (self, channel, offset, type, timestamp, source, target, data) :
- """
- Initialize with given values
- """
-
- self.channel = channel
- self.offset = offset
- self.type = type
- self.timestamp = timestamp
- self.source = source
- self.target = target
- self.data = data
-
- def format_type (self) :
- """
- Formats type as a string code
- """
-
- return LogTypes.name_from_code(self.type)
-
- def format_source (self) :
- """
- Formats source as [<chanflags>][<nickname>][!<username>][@<hostname>], omitting those parts that are missing.
-
- If all parts are None, this returns the empty string
- """
-
- nick, user, host, flags = self.source
-
- return "%s%s%s%s" % (
- flags if flags and flags != ' ' else '',
- nick if nick else '',
- '!' + user if user else '',
- '@' + host if host else ''
- )
-
- def __unicode__ (self) :
- return '\t'.join((
- self.channel.name,
- str(self.offset),
- self.format_type(),
- str(self.timestamp),
- self.format_source(),
- str(self.target),
- unicode(self.data)
- ))
-
- def __repr__ (self) :
- return "LogLine(%r, %s, %-12s, %s, %-35s, %-10s, %r)" % (
- self.channel, self.offset, self.format_type(), self.timestamp, self.format_source(), self.target, self.data
- )
-
--- a/log_parser.py Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,233 +0,0 @@
-"""
- Parse log data into log_events
-"""
-
-import re
-import datetime
-
-from log_line import LogTypes, LogLine
-
-class LogParseError (Exception) :
- """
- Parsing some line failed
- """
-
- def __init__ (self, line, offset, message) :
- super(LogParseError, self).__init__("%r@%s: %s" % (line, offset, message))
-
-class LogParser (object) :
- """
- Abstract interface
- """
-
- def __init__ (self, tz, timestamp_fmt="%H:%M:%S") :
- """
- Setup the parser to use the given format for line timestamps, which are of the given timezone
- """
-
- self.tz = tz
- self.timestamp_fmt = timestamp_fmt
-
- def parse_lines (self, channel, lines, date=None, starting_offset=None) :
- """
- Parse the given (iterable) lines of unicode text into a LogEvent, no trailing newline.
-
- Channel is the LogChannel that these lines belong to.
-
- Offset is the starting offset, and may be None to not use it.
-
- Giving date lets the parser build full timestamps, otherwise, unless line timestamps have full date
- information, event timestamps will have a date component of 1900/1/1.
- """
-
- abstract
-
-class IrssiParser (LogParser) :
- """
- A parser for irssi logfiles
- """
-
- # timestamp prefix, with trailing space
- _TS = r'(?P<timestamp>[a-zA-Z0-9: ]+[a-zA-Z0-9])\s*'
-
- # subexpression parts
- _NICK = r'(?P<nickname>.+?)'
- _NICK2 = r'(?P<nickname2>.+?)'
- _TARGET = r'(?P<target>.+?)'
- _CHAN = r'(?P<channel>.+?)'
- _CHAN2 = r'(?P<channel2>.+?)'
- _USERHOST = r'(?P<username>.*?)@(?P<hostname>.*?)'
- _MSG = r'(?P<message>.*)'
- _SRV1 = r'(?P<server1>.+?)'
- _SRV2 = r'(?P<server2>.+?)'
-
- # regular expressions for matching lines, by type
- TYPE_EXPRS = (
- ( LogTypes.LOG_OPEN, r'--- Log opened (?P<datetime>.+)' ),
- ( LogTypes.LOG_CLOSE, r'--- Log closed (?P<datetime>.+)' ),
- ( LogTypes.MSG, _TS + r'<(?P<flags>.)' + _NICK + '> ' + _MSG ),
- ( LogTypes.NOTICE, _TS + r'-' + _NICK + ':' + _CHAN + '- ' + _MSG ),
- ( LogTypes.ACTION, _TS + r'\* ' + _NICK + ' ' + _MSG ),
- ( LogTypes.JOIN, _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has joined ' + _CHAN ),
- ( LogTypes.PART, _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has left ' + _CHAN + ' \[(?P<message>.*?)\]' ),
- ( LogTypes.KICK, _TS + r'-!- ' + _TARGET + ' was kicked from ' + _CHAN + ' by ' + _NICK + ' \[(?P<message>.*?)\]' ),
- # XXX: use hostname instead of nickname for ServerMode
- ( LogTypes.MODE, _TS + r'-!- (mode|ServerMode)/' + _CHAN + ' \[(?P<mode>.+?)\] by (?P<nickname>\S+)' ),
- ( LogTypes.NICK, _TS + r'-!- ' + _NICK + ' is now known as (?P<target>\S+)' ),
- ( LogTypes.QUIT, _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has quit \[(?P<message>.*?)\]' ),
- ( LogTypes.TOPIC, _TS + r'-!- (' + _NICK + ' changed the topic of ' + _CHAN + ' to: (?P<topic>.*)|Topic unset by ' + _NICK2 + ' on ' + _CHAN2 + ')' ),
-
- ( LogTypes.SELF_NOTICE, _TS + r'\[notice\(' + _CHAN + '\)\] ' + _MSG ),
- ( LogTypes.SELF_NICK, _TS + r'-!- You\'re now known as (?P<target>\S+)' ),
-
- ( LogTypes.NETSPLIT_START, _TS + r'-!- Netsplit ' + _SRV1 + ' <-> ' + _SRV2 + ' quits: (?P<nick_list>[^(]+)( \(\+(?P<count>\d+) more,\S+\))?'),
- ( LogTypes.NETSPLIT_END, _TS + r'-!- Netsplit over, joins: (?P<nick_list>[^(]+)( \(\+(?P<count>\d+) more\))?' ),
-
- ( 'DAY_CHANGED', r'--- Day changed (?P<date>.+)' ),
- )
-
- # precompile
- TYPE_REGEXES = [(type, re.compile(expr)) for type, expr in TYPE_EXPRS]
-
- def parse_line (self, channel, line, date, offset=None) :
- """
- Parse a single line, and return the resulting LogLine, or None, to ignore the line.
-
- Uses self.TYPE_REGEXES to do the matching
- """
-
- # empty line
- if not line :
- return
-
- # look for match
- match = type = None
-
- # test each type
- for type, regex in self.TYPE_REGEXES :
- # attempt to match
- match = regex.match(line)
-
- # found, break
- if match :
- break
-
- # no match found?
- if not match :
- raise LogParseError(line, offset, "Line did not match any type")
-
- # match groups
- groups = match.groupdict(None)
-
- # parse timestamp
- if 'datetime' in groups :
- # parse datetime using default asctime() format
- dt = datetime.datetime.strptime(groups['datetime'], '%a %b %d %H:%M:%S %Y')
-
- elif 'timestamp' in groups :
- # parse timestamp into naive datetime
- dt = datetime.datetime.strptime(groups['timestamp'], self.timestamp_fmt)
-
- # override date?
- if date :
- dt = dt.replace(year=date.year, month=date.month, day=date.day)
-
- elif 'date' in groups :
- # parse date-only datetime
- dt = datetime.datetime.strptime(groups['date'], '%a %b %d %Y')
-
- else :
- # no timestamp !?
- raise LogParseError(line, offset, "No timestamp")
-
- # now localize with timezone
- dtz = self.tz.localize(dt)
-
- # channel, currently unused
- channel_name = (groups.get('channel') or groups.get('channel2'))
-
- # source
- if 'server1' in groups :
- source = (None, None, groups.get('server1'), None)
-
- else :
- source = (groups.get('nickname') or groups.get('nickname2'), groups.get('username'), groups.get('hostname'), groups.get('flags'))
-
- # target
- if 'server2' in groups :
- target = groups.get('server2')
-
- else :
- target = groups.get('target')
-
- # data
- if 'message' in groups :
- data = groups['message']
-
- elif 'mode' in groups :
- data = groups['mode']
-
- elif 'topic' in groups :
- data = groups['topic']
-
- elif 'nick_list' in groups :
- # split into components
- list = groups['nick_list'].split(', ')
-
- # additional count?
- if 'count' in groups and groups['count'] :
- list.append('+%d' % int(groups['count']))
-
- # join
- data = ' '.join(list)
-
- else :
- data = None
-
- # custom types?
- if type == 'DAY_CHANGED' :
- # new date
- date = dtz
-
- else :
- # build+return (date, LogLine)
- return date, LogLine(channel, offset, type, dtz, source, target, data)
-
- def parse_lines (self, channel, lines, date=None, starting_offset=None) :
- """
- Parse the given lines, yielding LogEvents.
- """
-
- for offset, line in enumerate(lines) :
- # offset?
- if starting_offset :
- offset = starting_offset + offset
-
- else :
- offset = None
-
- # try and parse
- try :
- # get None or (date, line)
- line_info = self.parse_line(channel, line, date, offset)
-
- # passthrough LogParseError's
- except LogParseError :
- raise
-
- # wrap other errors as LogParseError
- except Exception, e :
- raise LogParseError(line, offset, "Parsing line failed: %s" % e)
-
- else :
- # nothing?
- if not line_info :
- continue
-
- # unpack, update date
- date, line = line_info
-
- # yield
- yield line
-
-
--- a/log_search.py Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,367 +0,0 @@
-"""
- Full-text searching of logs
-"""
-
-import datetime, calendar, pytz
-import os.path
-
-import HyperEstraier as hype
-
-import log_line, utils, config
-
-class LogSearchError (Exception) :
- """
- General search error
- """
-
- pass
-
-class SearchIndexError (LogSearchError) :
- """
- Error manipulating the index
- """
-
- def __init__ (self, msg, db) :
- """
- Build the error from the given message + HyperEstraier.Database
- """
-
- super(SearchIndexError, self).__init__("%s: %s" % (msg, db.err_msg(db.error())))
-
-class NoResultsFound (LogSearchError) :
- """
- No results found
- """
-
- pass
-
-class LogSearchIndex (object) :
- """
- An index on the logs for a group of channels.
-
- This uses Hyper Estraier to handle searching, whereby each log line is a document (yes, I have a powerful server).
-
- These log documents have the following attributes:
- @uri - channel/date/line
- channel - channel code
- type - the LogType id
- timestamp - UTC timestamp
- source_nickname - source nickname
- source_username - source username
- source_hostname - source hostname
- source_chanflags - source channel flags
- target_nickname - target nickname
-
- Each document then has a single line of data, which is the log data message
- """
-
- def __init__ (self, channels, path, mode='r') :
- """
- Open the database at the given path, with the given mode:
- First char:
- r - read, error if not exists
- w - write, create if not exists
- a - write, error if not exists
- c - create, error if exists
-
- Additional chars:
- trunc - truncate if exists
- + - read as well as write
- ? - non-blocking lock open, i.e. it fails if already open
-
- Channels is the ChannelList.
- """
-
- # store
- self.channels = channels
- self.path = path
- self.mode = mode
-
- # check it does not already exist?
- if mode in 'c' and os.path.exists(path) :
- raise LogSearchError("Index already exists: %s" % (path, ))
-
- # mapping of { mode -> flags }
- mode_to_flag = {
- 'r': hype.Database.DBREADER,
- 'w': hype.Database.DBWRITER | hype.Database.DBCREAT,
- 'a': hype.Database.DBWRITER,
- 'c': hype.Database.DBWRITER | hype.Database.DBCREAT,
- }
-
- # flags to use, standard modes
- flags = mode_to_flag[mode[0]]
-
- # mode-flags
- if '?' in mode :
- # non-blocking locking
- flags |= hype.Database.DBLCKNB
-
- elif '+' in mode :
- # read
- flags |= hype.Database.DBREADER
-
- elif 'trunc' in mode :
- # truncate. Dangerous!
- flags |= hype.Database.DBTRUNC
-
- # make instance
- self.db = hype.Database()
-
- # open
- if not self.db.open(path, flags) :
- raise SearchIndexError("Index open failed: %s, mode=%s, flags=%#06x" % (path, mode, flags), self.db)
-
- def close (self) :
- """
- Explicitly close the index, this is done automatically on del
- """
-
- if not self.db.close() :
- raise SearchIndexError("Index close failed", self.db)
-
- def insert (self, channel, lines) :
- """
- Adds a sequence of LogLines from the given LogChannel to the index, and return the number of added items
- """
-
- # count from zero
- count = 0
-
- # iterate
- for line in lines :
- # insert
- self.insert_line(channel, line)
-
- # count
- count += 1
-
- # return
- return count
-
- def insert_line (self, channel, line) :
- """
- Adds a single LogLine for the given LogChannel to the index
- """
-
- # validate the LogChannel
- assert channel.id
-
- # validate the LogLine
- assert line.offset
- assert line.timestamp
-
- # create new document
- doc = hype.Document()
-
- # line date
- date = line.timestamp.date()
-
- # ensure that it's not 1900
- assert date.year != 1900
-
- # add URI
- doc.add_attr('@uri', "%s/%s/%d" % (channel.id, date.strftime('%Y-%m-%d'), line.offset))
-
- # add channel id
- doc.add_attr('channel', channel.id)
-
- # add type
- doc.add_attr('type', str(line.type))
-
- # add UTC timestamp
- doc.add_attr('timestamp', str(utils.to_utc_timestamp(line.timestamp)))
-
- # add source attribute?
- if line.source :
- source_nickname, source_username, source_hostname, source_chanflags = line.source
-
- if source_nickname :
- doc.add_attr('source_nickname', source_nickname.encode('utf8'))
-
- if source_username :
- doc.add_attr('source_username', source_username.encode('utf8'))
-
- if source_hostname :
- doc.add_attr('source_hostname', source_hostname.encode('utf8'))
-
- if source_chanflags :
- doc.add_attr('source_chanflags', source_chanflags.encode('utf8'))
-
- # add target attributes?
- if line.target :
- target_nickname = line.target
-
- if target_nickname :
- doc.add_attr('target_nickname', target_nickname.encode('utf8'))
-
- # add data
- if line.data :
- doc.add_text(line.data.encode('utf8'))
-
- # put, "clean up dispensable regions of the overwritten document"
- if not self.db.put_doc(doc, hype.Database.PDCLEAN) :
- raise SearchIndexError("put_doc", self.db)
-
- def search_cond (self, cond) :
- """
- Search using a raw hype.Condition. Raises NoResultsFound if there aren't any results
- """
-
- # execute search, unused 'flags' arg stays zero
- results = self.db.search(cond, 0)
-
- # no results?
- if not results :
- raise NoResultsFound()
-
- # iterate over the document IDs
- for doc_id in results :
- # load document, this throws an exception...
- # option constants are hype.Database.GDNOATTR/GDNOTEXT
- doc = self.db.get_doc(doc_id, 0)
-
- # load the attributes/text
- channel = self.channels.lookup(doc.attr('channel'))
- type = int(doc.attr('type'))
- timestamp = utils.from_utc_timestamp(int(doc.attr('timestamp')))
-
- # source
- source = (doc.attr('source_nickname'), doc.attr('source_username'), doc.attr('source_hostname'), doc.attr('source_chanflags'))
-
- # target
- target = doc.attr('target_nickname')
-
- # message text
- message = doc.cat_texts().decode('utf8')
-
- # build+yield to as LogLine
- yield log_line.LogLine(channel, None, type, timestamp, source, target, message)
-
- def search (self, options=None, channel=None, attrs=None, phrase=None, order=None, max=None, skip=None) :
- """
- Search with flexible parameters
-
- options - bitmask of hype.Condition.*
- channel - LogChannel object
- attrs - raw attribute expressions
- phrase - the search query phrase
- order - order attribute expression
- max - number of results to return
- skip - number of results to skip
- """
-
- # build condition
- cond = hype.Condition()
-
- if options :
- # set options
- cond.set_options(options)
-
- if channel :
- # add channel attribute
- cond.add_attr(("channel STREQ %s" % channel.id).encode('utf8'))
-
- if attrs :
- # add attributes
- for attr in attrs :
- cond.add_attr(attr.encode('utf8'))
-
- if phrase :
- # add phrase
- cond.set_phrase(phrase.encode('utf8'))
-
- if order :
- # set order
- cond.set_order(order)
-
- if max :
- # set max
- cond.set_max(max)
-
- if skip :
- # set skip
- cond.set_skip(skip)
-
- # execute
- return self.search_cond(cond)
-
- def search_simple (self, channel, query, count=None, offset=None, search_msg=True, search_nick=False) :
- """
- Search for lines from the given channel for the given simple query.
-
- The search_* params define which attributes to search for (using fulltext search for the message, STROR for
- attributes).
- """
-
- # search attributes
- attrs = []
-
- # nickname target query
- if search_nick :
- attrs.append("source_nickname STRINC %s" % query)
-# attrs.append("target_nickname STRINC %s" % query)
-
- # use search(), backwards
- results = list(self.search(
- # simplified phrase
- options = hype.Condition.SIMPLE,
-
- # specific channel
- channel = channel,
-
- # given phrase
- phrase = query if search_msg else None,
-
- # attributes defined above
- attrs = attrs,
-
- # order by timestamp, descending (backwards)
- order = "timestamp NUMD",
-
- # count/offset
- max = count,
- skip = offset,
- ))
-
- # reverse
- return reversed(results)
-
- def list (self, channel, date, count=None, skip=None) :
- """
- List all indexed log items for the given UTC date
- """
-
- # start/end dates
- dt_start = datetime.datetime(date.year, date.month, date.day, 0, 0, 0, 0)
- dt_end = datetime.datetime(date.year, date.month, date.day, 23, 23, 59, 999999)
-
- # search
- return self.search(
- # specific channel
- channel = channel,
-
- # specific date range
- attrs = [
- "timestamp NUMBT %d %d" % (utils.to_utc_timestamp(dt_start), utils.to_utc_timestamp(dt_end))
- ],
-
- # order correctly
- order = "timestamp NUMA",
-
- # max count/offset
- max = count,
- skip = skip
- )
-
-def get_index () :
- """
- Returns the default read-only index, suitable for searching
- """
-
- # XXX: no caching, just open it every time
- _index = LogSearchIndex(config.LOG_CHANNELS, config.SEARCH_INDEX_PATH, 'r')
-
- # return
- return _index
-
--- a/log_source.py Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,679 +0,0 @@
-"""
- A source of IRC log files
-"""
-
-import datetime, calendar, itertools, functools, math
-import os, os.path, errno
-import pytz
-
-import config, utils
-
-# a timedelta that represents one day
-ONE_DAY = datetime.timedelta(days=1)
-
-class LogSourceDecoder (object) :
- """
- Handles decoding of LogSource lines
- """
-
- def __init__ (self, encoding_list) :
- """
- Will try each of the given (charset, errors) items in turn, until one succeeds
- """
-
- self.encoding_list = encoding_list
-
- def decode (self, line) :
- """
- Decode the line of str() text into an unicode object
- """
-
- # list of errors encountered
- error_list = []
-
- # try each in turn
- for charset, errors in self.encoding_list :
- # trap UnicodeDecodeError to try with the next one
- try :
- return line.decode(charset, errors)
-
- except UnicodeDecodeError, e :
- error_list.append("%s:%s - %s" % (charset, errors, e))
- continue
-
- # failure
- raise UnicodeDecodeError("Failed to decode line: %r: %s" % (line, ', '.join(error_list)))
-
-class LogSource (object) :
- """
- A collection of IRC logs for a specific target in some format. Provides the possibility to read specific events
- """
-
- def __init__ (self, decoder, channel=None) :
- """
- The appropriate LogChannel must be given, as we need to be able to construct the LogLines. If it is not yet
- known, then it can be given as None, and set later with bind_channel.
-
- Uses the given LogSourceDecoder to decode the lines.
- """
-
- self.channel = channel
- self.decoder = decoder
-
- def bind_channel (self, channel) :
- """
- Set this source's channel, where None was set before
- """
-
- assert not self.channel
-
- self.channel = channel
-
- def get_latest (self, count) :
- """
- Yield the latest events, up to `count` of them.
- """
-
- abstract
-
- def get_date (self, dt) :
- """
- Get logs for the given date (as a datetime).
- """
-
- abstract
-
- def get_date_paged (self, dt, count, page=None) :
- """
- Get the logs for a given date (as a datetime), divided into pages of count each. If page is given, the time
- portion of the dt is ignored, and the lines for the given page are returned. Otherwise, if page is None,
- then the lines for the page containing the given timestamp is returned.
-
- The return value is a (page, max, lines) tuple.
- """
-
- # how to act?
- if page :
- # constant skip
- skip = (page - 1) * count
-
- else :
- skip = None
-
- # go through the logs a page at a time
- this_page = 1
-
- # last line's timestamp
- last_ts = None
-
- # found it yet?
- found = False
-
- # count the full number of lines
- line_count = 0
-
- # collect lines
- lines = []
-
- # iterate using get_date
- for line in self.get_date(dt) :
- # count them
- line_count += 1
-
- # skip?
- if skip :
- skip -= 1
- continue
-
- # is this page all that we want/need?
- if page or found :
- # already full?
- if len(lines) >= count :
- continue
-
- # specfic timestamp
- else :
- # didn't find it in this page?
- if len(lines) >= count :
- # reset to next page
- lines = []
- this_page += 1
-
- # is dt between these two timestamps?
- if (not last_ts or last_ts <= dt) and (dt <= line.timestamp) :
- # found!
- found = True
- page = this_page
-
- else :
- # keep looking
- last_ts = line.timestamp
-
- # store line
- lines.append(line)
-
- # calculate max_pages
- max_pages = math.ceil(float(line_count) / count)
-
- # return
- return (page, max_pages, lines)
-
- def get_month_days (self, dt) :
- """
- Return an ordered sequence of dates, telling which days in the given month (as a datetime) have logs available.
- """
-
- abstract
-
- def get_modified (self, dt=None, after=None, until=None) :
- """
- Returns a sequence of LogLines that may have been *modified* from their old values since the given datetime.
-
- If the datetime is not given, *all* lines are returned.
-
- If after is given, only lines from said date onwards will be returned, regardless of modification.
- If until is given, only lines up to and including said date will be returned, regardless of modification.
-
- The LogLines should be in time order.
- """
-
- abstract
-
- def get_prev_date (self, dt) :
- """
- Get the next distinct date of logs available preceeding the given date, or None
- """
-
- abstract
-
- def get_next_date (self, dt) :
- """
- Get the next distinct date of logs following the given date, or None.
- """
-
- abstract
-
-class LogFile (object) :
- """
- A file containing LogEvents
-
- XXX: modify to implement LogSource?
- """
-
- def __init__ (self, path, parser, decoder, channel=None, start_date=None, sep='\n') :
- """
- Open the file at the given path, which contains lines as separated by the given separator. Lines are
- decoded using the given LogSourceDecoder, and then parsed using the given parser, using the given date
- as the initial date for this log's first line.
-
- XXX: currently we assume start_date also for the end of the file
- """
-
- # store
- self.channel = channel
- self.path = path
- self.parser = parser
- self.start_date = start_date
- self.decoder = decoder
- self.sep = sep
-
- # open
- self.file = open(path, 'rb')
-
- def __iter__ (self) :
- """
- Yields a series of unicode lines, as read from the top of the file
- """
-
- # seek to beginning
- self.file.seek(0)
-
- # iterate over lines, decoding them as well
- return (self.decoder.decode(line.rstrip(self.sep)) for line in self.file)
-
- def read_full (self) :
- """
- Reads all LogLines. The LogLines will have a valid offset.
- """
-
- # just use our __iter__
- return self.parser.parse_lines(self.channel, self, self.start_date, starting_offset=1)
-
- def read_from (self, dt) :
- """
- Reads all LogLines from the given naive timestamp onwards
- """
-
- # start reading at beginning
- events = self.read_full()
-
- # skip unwanted events
- for event in events :
- if event.timestamp < dt :
- continue
-
- else :
- # include this line as well
- yield event
- break
-
- # yield the rest as-is
- for event in events :
- yield event
-
- def read_until (self, dt) :
- """
- Reads all LogLines up until the given naive timestamp
- """
-
- # start reading events at the beginning
- events = self.read_full()
-
- # yield events until we hit the given timestamp
- for event in events :
- if event.timestamp <= dt :
- yield event
-
- else :
- break
-
- # ignore the rest
- return
-
- def _read_blocks_reverse (self, blocksize=1024) :
- """
- Yields blocks of file data in reverse order, starting at the end of the file
- """
-
- # seek to end of file
- self.file.seek(0, os.SEEK_END)
-
- # read offset
- # XXX: hack -1 to get rid of trailing newline
- size = offset = self.file.tell() - 1
-
- # do not try to read past the beginning of the file
- while offset > 0:
- # calc new offset + size
- if offset > blocksize :
- # full block
- offset -= blocksize
- read_size = blocksize
-
- else :
- # partial block
- read_size = offset
- offset = 0
-
- # seek to offset
- self.file.seek(offset)
-
- # read the data we want
- block = self.file.read(read_size)
-
- # sanity check
- assert len(block) == read_size
-
- # yield
- yield block
-
- def _read_lines_reverse (self) :
- """
- Yields decoded lines from the end of the file, in reverse order.
- """
-
- # partial lines
- buf = ''
-
- # read from end of file, a block at a time
- for block in self._read_blocks_reverse() :
- # add in our previous buf
- buf = block + buf
-
- # split up lines
- lines = buf.split(self.sep)
-
- # keep the first one as our buffer, as it's incomplete
- buf = lines[0]
-
- # yield the rest a line at a time in reverse order... this looks weird, but that's how slicing works :)
- # XXX: use something like islice, this has to build a slice object
- for line in lines[:0:-1] :
- yield self.decoder.decode(line)
-
- def read_latest (self, count) :
- """
- Returns up to count events, from the end of the file, or less, if the file doesn't contain that many lines.
- """
-
- # the list of lines
- lines = []
-
- # start reading lines into lines
- for line in self._read_lines_reverse() :
- # append
- lines.append(line)
-
- # done?
- if len(lines) >= count :
- break
-
- # decode in reverse order, using our starting date....
- # XXX: use lines[::-1] or reversed?
- # XXX: it may make more sense to parse in reverse order, using 'self.end_date' or something like that
- return self.parser.parse_lines(self.channel, reversed(lines), self.start_date)
-
-class LogDirectory (LogSource) :
- """
- A directory containing a series of timestamped LogFiles
- """
-
- def __init__ (self, path, tz, parser, decoder, filename_fmt, channel=None) :
- """
- Load the logfiles at the given path, which are for the given LogChannel
-
- Decode the file lines using the given decoder, the files are named according the the date in the given
- timezone and date format, and will be parsed using the given parser.
- """
-
- # store
- self.channel = channel
- self.path = path
- self.tz = tz
- self.parser = parser
- self.decoder = decoder
- self.filename_fmt = filename_fmt
-
- def _get_logfile_date (self, d, load=True, mtime=False, ignore_missing=False) :
- """
- Get the logfile corresponding to the given naive date in our timezone.
-
- If load is False, only test for the presence of the logfile, do not actually open it. If mtime is given,
- then this returns the file's mtime
-
- Returns None if the logfile does not exist, unless ignore_missing is given as False.
- """
-
- # format filename
- filename = d.strftime(self.filename_fmt)
-
- # build path
- path = os.path.join(self.path, filename)
-
- try :
- if load :
- # open+return the LogFile
- return LogFile(path, self.parser, self.decoder, start_date=d, channel=self.channel)
-
- elif mtime :
- # stat
- return utils.mtime(path)
-
- else :
- # test
- return os.path.exists(path)
-
- # XXX: move to LogFile
- except IOError, e :
- # return None for missing files
- if e.errno == errno.ENOENT and ignore_missing :
- return None
-
- else :
- raise
-
- def _iter_logfile_dates (self, after=None, until=None, reverse=False) :
- """
- Yields a series of naive datetime objects representing the logfiles that are available, in time order.
-
- Parameters :
- after only dates from said date onwards will be returned
- until only dates up to and including said date will be returned
- reverse the dates are returned in reverse order instead. Note that the meaning of after/until doesn't change
- """
-
- # convert timestamps to our timezone's dates
- if after :
- after = after.astimezone(self.tz).date()
-
- if until :
- until = until.astimezone(self.tz).date()
-
- # listdir
- filenames = os.listdir(self.path)
-
- # sort
- filenames.sort(reverse=reverse)
-
- # iter files
- for filename in filenames :
- try :
- # parse date
- dt = self.tz.localize(datetime.datetime.strptime(filename, self.filename_fmt))
- date = dt.date()
-
- except :
- # ignore
- continue
-
- else :
- if (after and date < after) or (until and date > until) :
- # ignore
- continue
-
- else :
- # yield
- yield dt
-
- def _iter_date_reverse (self, dt=None) :
- """
- Yields an infinite series of naive date objects in our timezone, iterating backwards in time starting at the
- given *datetime*, or the the current date, if none given
- """
-
- # default to now
- if not dt :
- dtz = self.tz.localize(datetime.datetime.now())
-
- else :
- # convert to target timezone
- dtz = dt.astimezone(self.tz)
-
- # iterate unto infinity
- while True :
- # yield
- yield dtz.date()
-
- # one day sdrawkcab
- dtz -= ONE_DAY
-
- def _iter_logfile_reverse (self, dt=None, max_files=100) :
- """
- Yields a series of LogFile objects, iterating backwards in time starting at the given datetime, or the
- current date, if none given.
-
- Reads/probes at most max_files files.
- """
-
- # start counting at zero...
- file_count = 0
-
- # have we found any files at all so far?
- have_found = False
-
- # iterate backwards over days
- for day in self._iter_date_reverse(dt) :
- # stop if we've handled enough files by now
- if file_count > max_files :
- break
-
- # try and open the next logfile
- logfile = None
-
- file_count += 1
- logfile = self._get_logfile_date(day, ignore_missing=True)
-
- # no logfile there?
- if not logfile :
- # hit our limit?
- if file_count > max_files :
- # if we didn't find any logfiles at all, terminate rudely
- if not have_found :
- raise Exception("No recent logfiles found")
-
- else :
- # stop looking, deal with what we've got
- return
-
- else :
- # skip to next day
- continue
-
- # mark have_found
- have_found = True
-
- # yield it
- yield logfile
-
- def get_latest (self, count) :
- """
- Uses _logfile_reverse to read the yield the given lines from as many logfiles as needed
- """
-
- # read the events into here
- lines = []
-
- # start reading in those logfiles
- for logfile in self._iter_logfile_reverse() :
- # read the events
- # XXX: use a queue
- lines = list(logfile.read_latest(count)) + lines
-
- # done?
- if len(lines) >= count :
- break
-
- # return the events
- return lines
-
- def get_date (self, dt) :
- """
- A 'day' is considered to be a 24-hour period from 00:00:00 23:59:59. If the timezone of the given datetime
- differs from our native datetime, this may involve lines from more than one logfile.
- """
-
- # begin/end of 24h period, in target timezone
- dtz_begin = dt.replace(hour=0, minute=0, second=0).astimezone(self.tz)
- dtz_end = dt.replace(hour=23, minute=59, second=59, microsecond=999999).astimezone(self.tz)
-
- # as dates
- d_begin = dtz_begin.date()
- d_end = dtz_end.date()
-
-# print
-# print "LogDirectory.get_date - %s" % dt
-# print "\t %s %s" % (d_begin, dtz_begin)
-# print "\t-> %s %s" % (d_end, dtz_end)
-
- # if they're the same, just pull the full log for that date
- if d_begin == d_end :
- # open that log
- logfile = self._get_logfile_date(d_begin)
-
- # return the full data
- return logfile.read_full()
-
- # otherwise, we need to pull two partial logs
- else :
- # open both of them, but it's okay if we don't have the second one
- f_begin = self._get_logfile_date(d_begin)
- f_end = self._get_logfile_date(d_end, ignore_missing=True)
-
- # chain together the two sources
- return itertools.chain(
- f_begin.read_from(dtz_begin),
- f_end.read_until(dtz_end) if f_end else []
- )
-
- def _iter_month_days (self, month) :
- """
- Iterates over the days of a month as dt objects with time=0
- """
-
- # there's at most 31 days in a month...
- for day in xrange(1, 32) :
- try :
- # try and build the datetime
- dt = datetime.datetime(month.year, month.month, day)
-
- except :
- # stop
- return
-
- else :
- # fix timezones + yield
- yield month.tzinfo.localize(dt)
-
- def get_month_days (self, month) :
- """
- Returns a set of dates for which logfiles are available in the given datetime's month
- """
-
- # iterate over month's days
- for dt in self._iter_month_days(month) :
- # date in our target timezone
- log_date = dt.astimezone(self.tz).date()
-
- # test for it
- if self._get_logfile_date(log_date, load=False, ignore_missing=True) :
- # valid
- yield dt.date()
-
- def get_modified (self, dt=None, after=None, until=None) :
- """
- Returns the contents off all logfiles with mtimes past the given date
- """
-
- # iterate through all available logfiles in date order, as datetimes, from the given date on
- for log_date in self._iter_logfile_dates(after, until) :
- # compare against dt?
- if dt :
- # stat
- mtime = self._get_logfile_date(log_date, load=False, mtime=True, ignore_missing=True)
-
- # not modified?
- if mtime < dt :
- # skip
- continue
-
- # open
- logfile = self._get_logfile_date(log_date)
-
- # yield all lines
- for line in logfile.read_full() :
- yield line
-
- def get_prev_date (self, dt) :
- """
- Just use _iter_logfile_dates
- """
-
- # use for to "iter" once
- for log_date in self._iter_logfile_dates(until=dt - ONE_DAY, reverse=True) :
- return log_date
-
- else :
- return None
-
- def get_next_date (self, dt) :
- """
- Just use _iter_logfile_dates
- """
-
- # use for to "iter" once
- for log_date in self._iter_logfile_dates(after=dt + ONE_DAY) :
- return log_date
-
- else :
- return None
-
--- a/preferences.py Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,534 +0,0 @@
-"""
- Handling user preferences
-"""
-
-import functools
-import Cookie
-
-from qmsk.web import urltree
-import utils
-
-class Preference (urltree.URLType) :
- """
- A specific preference
- """
-
- # the name to use
- name = None
-
- # the default value, as from parse()
- default = None
-
- def is_default (self, value) :
- """
- Returns True if the given post-value is the default value for this preference.
-
- Defaults to just compare against self.default
- """
-
- return (value == self.default)
-
- def process (self, preferences, value) :
- """
- Post-process this preference value. This can access the post-processed values of all other preferences that
- were defined before this one in the list given to Preferences.
-
- Defaults to just return value.
- """
-
- return value
-
-class RequestPreferences (object) :
- """
- Represents the specific preferences for some request
- """
-
- def __init__ (self, preferences, request, value_map=None) :
- """
- Initialize with the given Preferences object, http Request, and { key: value } mapping of raw preference values.
-
- This will build a mapping of { name: pre-value } using Preference.parse/Preference.default, and then
- post-process them into the final { name: value } mapping using Preference.process, in strict pref_list
- order. Note that the process() method will only have access to those preferences processed before it was.
- """
-
- # store
- self.preferences = preferences
- self.request = request
-
- # initialize
- self.values = {}
- self.set_cookies = {}
-
- # initial value map
- pre_values = {}
-
- # load preferences
- for pref in preferences.pref_list :
- # got a value for it?
- if value_map and pref.name in value_map :
- # get value
- value = value_map[pref.name]
-
- # parse it
- value = pref.parse(value)
-
- else :
- # use default value
- value = pref.default
-
- # add
- pre_values[pref.name] = value
-
- # then post-process using Preferences.process(), in strict pref_list order
- for pref in preferences.pref_list :
- # store into self.values, so that pref.get(...) will be able to access the still-incomplete self.values
- # dict
- self.values[pref.name] = pref.process(self, pre_values[pref.name])
-
- def _get_name (self, pref) :
- """
- Look up a Preference's name, either by class, object or name.
- """
-
- # Preference -> name
- if isinstance(pref, Preference) :
- pref = pref.name
-
- return pref
-
- def pref (self, name) :
- """
- Look up a Preference by object, name
- """
-
- # Preference
- if isinstance(name, Preference) :
- return name
-
- # Preference.name
- elif isinstance(name, basestring) :
- return self.preferences.pref_map[name]
-
- # XXX: class?
- else :
- assert False
-
- def get (self, pref) :
- """
- Return the value for the given Preference, or preference name
- """
-
- # look up
- return self.values[self._get_name(pref)]
-
- # support dict-access
- __getitem__ = get
-
- def is_default (self, pref) :
- """
- Returns True if the given preference is at its default value
- """
-
- # determine using Preference.is_default
- return self.pref(pref).is_default(self.get(pref))
-
- def build (self, pref) :
- """
- Like 'get', but return the raw cookie value
- """
-
- # the Preference
- pref = self.pref(pref)
-
- # build
- return pref.build(self.get(pref))
-
- def parse (self, pref, value=None) :
- """
- Parse+process the raw value for some pref into a value object.
-
- Is the given raw value is None, this uses Preference.default
- """
-
- # lookup pref
- pref = self.pref(pref)
-
- # build value
- if value is not None :
- # parse
- value = pref.parse(value)
-
- else :
- # default
- value = pref.default
-
- # post-process
- value = pref.process(self, value)
-
- # return
- return value
-
- def set (self, name, value_obj=None) :
- """
- Set a new value for the given preference (by str name).
-
- If value_obj is None, then the preference cookie is unset
- """
-
- # sanity-check to make sure we're not setting it twice...
- assert name not in self.set_cookies
-
- # None?
- if value_obj is not None :
- # encode using the Preference object
- value_str = self.preferences.pref_map[name].build(value_obj)
-
- else :
- # unset as None
- value_str = None
-
- # update in our dict
- self.values[name] = value_obj
-
- # add to set_cookies
- self.set_cookies[name] = value_str
-
-class Preferences (object) :
- """
- Handle user preferences using cookies
- """
-
- def __init__ (self, pref_list) :
- """
- Use the given list of Preference objects.
-
- The ordering of the given pref_list is significant for the process() implementation, as the
- Preferences are process()'d in order.
- """
-
- # store
- self.pref_list = pref_list
-
- # translate to mapping as well
- self.pref_map = dict((pref.name, pref) for pref in pref_list)
-
- def load (self, request, ) :
- """
- Load the set of preferences for the given request, and return as a { name -> value } dict
- """
-
- # the dict of values
- values = {}
-
- # load the cookies
- cookie_data = request.env.get('HTTP_COOKIE')
-
- # got any?
- if cookie_data :
- # parse into a SimpleCookie
- cookies = Cookie.SimpleCookie(cookie_data)
-
- # update the the values
- values.update((morsel.key, morsel.value) for morsel in cookies.itervalues())
-
- else :
- cookies = None
-
- # apply any query parameters
- for pref in self.pref_list :
- # look for a query param
- value = request.get_arg(pref.name)
-
- if value :
- # override
- values[pref.name] = value
-
- # build the RequestPreferences object
- return cookies, RequestPreferences(self, request, values)
-
- def handler (self, *pref_list) :
- """
- Intended to be used as a decorator for a request handler, this will load the give Preferences and pass
- them to the wrapped handler as keyword arguments, in addition to any others given.
- """
-
- def _decorator (func) :
- @functools.wraps(func)
- def _handler (request, **args) :
- # load preferences
- cookies, prefs = self.load(request)
-
- # bind to request.prefs
- # XXX: better way to do this? :/
- request.prefs = prefs
-
- # update args with new ones
- args.update(((pref.name, prefs.get(pref)) for pref in pref_list))
-
- # handle to get response
- response = func(request, **args)
-
- # set cookies?
- if prefs.set_cookies :
- # default, empty, cookiejar
- if not cookies :
- cookies = Cookie.SimpleCookie('')
-
- # update cookies
- for key, value in prefs.set_cookies.iteritems() :
- if value is None :
- assert False, "Not implemented yet..."
-
- else :
- # set
- cookies[key] = value
- cookies[key]["path"] = config.PREF_COOKIE_PATH
- cookies[key]["expires"] = config.PREF_COOKIE_EXPIRE_SECONDS
-
- # add headers
- for morsel in cookies.itervalues() :
- response.add_header('Set-cookie', morsel.OutputString())
-
- return response
-
- # return wrapped handler
- return _handler
-
- # return decorator...
- return _decorator
-
-# now for our defined preferences....
-import pytz
-import config
-
-class TimeFormat (urltree.URLStringType, Preference) :
- """
- Time format
- """
-
- # set name
- name = 'time_format'
-
- # default value
- default = config.PREF_TIME_FMT_DEFAULT
-
-class DateFormat (urltree.URLStringType, Preference) :
- """
- Date format
- """
-
- # set name
- name = 'date_format'
-
- # default value
- default = config.PREF_DATE_FMT_DEFAULT
-
-class TimezoneOffset (Preference) :
- """
- If the DST-aware 'timezone' is missing, we can fallback to a fixed-offset timezone as detected by
- Javascript.
-
- This is read-only, and None by default
- """
-
- name = 'timezone_offset'
- default = None
-
- def parse (self, offset) :
- """
- Offset in minutes -> said minutes
- """
-
- return int(offset)
-
-class Timezone (Preference) :
- """
- Timezone
- """
-
- # set name
- name = 'timezone'
-
- # default is handled via process()
- default = 'auto'
-
- # the list of available (value, name) options for use with helpers.select_options
- OPTIONS = [('auto', "Autodetect")] + [(None, tz_name) for tz_name in pytz.common_timezones]
-
- def parse (self, name) :
- """
- default -> default
- tz_name -> pytz.timezone
- """
-
- # special-case for 'auto'
- if name == self.default :
- return self.default
-
- else :
- return pytz.timezone(name)
-
- def is_default (self, tz) :
- """
- True if it's a FixedOffsetTimezone or PREF_TIMEZONE_FALLBACK
- """
-
- return (isinstance(tz, utils.FixedOffsetTimezone) or tz == config.PREF_TIMEZONE_FALLBACK)
-
- def build (self, tz) :
- """
- FixedOffsetTimezone -> None
- pytz.timezone -> tz_name
- """
-
- # special-case for auto/no explicit timezone
- if self.is_default(tz) :
- return self.default
-
- else :
- # pytz.timezone zone name
- return tz.zone
-
- def process (self, prefs, tz) :
- """
- If this timezone is given, simply build that. Otherwise, try and use TimezoneOffset, and if that fails,
- just return the default.
-
- None -> FixedOffsetTimezone/PREF_TIMEZONE_FALLBACK
- pytz.timezone -> pytz.timezone
- """
-
- # specific timezone set?
- if tz != self.default :
- return tz
-
- # fixed offset?
- elif prefs[timezone_offset] is not None :
- return utils.FixedOffsetTimezone(prefs[timezone_offset])
-
- # default
- else :
- return config.PREF_TIMEZONE_FALLBACK
-
-class ImageFont (Preference) :
- """
- Font for ImageFormatter
- """
-
- # set name
- name = 'image_font'
-
- def __init__ (self, font_dict, default_name) :
- """
- Use the given { name: (path, title) } dict and default the given name
- """
-
- self.font_dict = font_dict
- self.default = self.parse(default_name)
-
- def parse (self, name) :
- """
- name -> (name, path, title)
- """
-
- path, title = self.font_dict[name]
-
- return name, path, title
-
- def build (self, font_info) :
- """
- (name, path, title) -> name
- """
-
- name, path, title = font_info
-
- return name
-
-class ImageFontSize (urltree.URLIntegerType, Preference) :
- # set name, default
- name = 'image_font_size'
- default = config.PREF_IMAGE_FONT_SIZE_DEFAULT
-
- # XXX: constraints for valid values
-
-class Formatter (Preference) :
- """
- LogFormatter to use
- """
-
- # set name
- name = 'formatter'
-
- def __init__ (self, formatters, default) :
- """
- Use the given { name -> class LogFormatter } dict and default (a LogFormatter class)
- """
-
- self.formatters = formatters
- self.default = default
-
- def parse (self, fmt_name) :
- """
- fmt_name -> class LogFormatter
- """
-
- return self.formatters[fmt_name]
-
- def build (self, fmt_cls) :
- """
- class LogFormatter -> fmt_name
- """
-
- return fmt_cls.name
-
- def process (self, prefs, fmt_cls) :
- """
- class LogFormatter -> LogFormatter(tz, time_fmt, image_font.path)
- """
-
- # time stuff
- tz = prefs[timezone]
- time_fmt = prefs[time_format]
-
- # font stuff
- font_name, font_path, font_title = prefs[image_font]
- font_size = prefs[image_font_size]
-
- return fmt_cls(tz, time_fmt, font_path, font_size)
-
-class Count (urltree.URLIntegerType, Preference) :
- """
- Number of lines of log data to display per page
- """
-
- # set name
- name = "count"
-
- # default
- default = config.PREF_COUNT_DEFAULT
-
- def __init__ (self) :
- super(Count, self).__init__(allow_negative=False, allow_zero=False, max=config.PREF_COUNT_MAX)
-
-# and then build the Preferences object
-time_format = TimeFormat()
-date_format = DateFormat()
-timezone_offset = TimezoneOffset()
-timezone = Timezone()
-image_font = ImageFont(config.FORMATTER_IMAGE_FONTS, config.PREF_IMAGE_FONT_DEFAULT)
-image_font_size = ImageFontSize()
-formatter = Formatter(config.LOG_FORMATTERS, config.PREF_FORMATTER_DEFAULT)
-count = Count()
-
-preferences = Preferences([
- time_format,
- date_format,
- timezone_offset,
- timezone,
- image_font,
- image_font_size,
- formatter,
- count,
-])
-
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/__init__.py Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,10 @@
+"""
+ The irclogs.qmsk.net site is an IRC log browser
+"""
+
+# the URL mapper
+import urls
+
+# our RequestHandler
+handler = urls.mapper
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/channels.py Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,38 @@
+"""
+ Our list of LogChannels
+"""
+
+class ChannelList (object) :
+ """
+ The list of channels, and related methods
+ """
+
+
+ def __init__ (self, channel_list) :
+ """
+ Initialize with the given channel dict
+ """
+
+ self.channel_list = channel_list
+ self.channel_dict = dict((channel.id, channel) for channel in channel_list)
+
+ def lookup (self, channel_name) :
+ """
+ Looks up the LogChannel for the given name
+ """
+
+ return self.channel_dict[channel_name]
+
+ def dict (self) :
+ """
+ Returns a { name: LogChannel } dict
+ """
+ return self.channel_dict
+
+ def __iter__ (self) :
+ """
+ Iterate over our defined LogChannel objects
+ """
+
+ return iter(self.channel_list)
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/config.py Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,112 @@
+"""
+ Configureable defaults
+"""
+
+import os.path, pytz
+from log_parser import IrssiParser
+from log_channel import LogChannel
+from log_source import LogSourceDecoder, LogDirectory
+from log_formatter import IrssiFormatter, DebugFormatter
+from channels import ChannelList
+import log_formatter
+
+# build relative paths to the dir containing this file
+relpath = lambda path : os.path.join(os.path.dirname(__file__), path)
+
+### ###
+### Configuration ###
+### ###
+
+# timezone to use for logs
+LOG_TIMEZONE = pytz.timezone('Europe/Helsinki')
+
+# timestamp format for logfiles
+LOG_TIMESTAMP_FMT = '%H:%M:%S'
+
+# the decoder used for logfiles
+LOG_DECODER = LogSourceDecoder((
+ ('utf-8', 'strict'),
+ ('latin-1', 'replace'),
+))
+
+# log filename format
+LOG_FILENAME_FMT = '%Y-%m-%d'
+
+# the log parser that we use
+LOG_PARSER = IrssiParser(LOG_TIMEZONE, LOG_TIMESTAMP_FMT)
+#LOG_PARSER_FULLTS = IrssiParser(LOG_TIMEZONE, '%Y%m%d%H%M%S')
+
+# the statically defined channel list
+LOG_CHANNELS = ChannelList([
+ LogChannel('tycoon', "OFTC", "#tycoon",
+ LogDirectory(relpath('/home/spbot/irclogs/tycoon'), LOG_TIMEZONE, LOG_PARSER, LOG_DECODER, LOG_FILENAME_FMT)
+ ),
+
+ LogChannel('openttd', "OFTC", "#openttd",
+ LogDirectory(relpath('/home/spbot/irclogs/openttd'), LOG_TIMEZONE, LOG_PARSER, LOG_DECODER, LOG_FILENAME_FMT)
+ ),
+
+ LogChannel('test', "TEST", "#test",
+ LogDirectory(relpath('/home/spbot/irclogs/test'), LOG_TIMEZONE, LOG_PARSER, LOG_DECODER, LOG_FILENAME_FMT)
+ )
+])
+
+# URL to the hgweb installation for this code
+HGWEB_URL = "http://hg.qmsk.net/irclogs2"
+
+# path to the mercurial working copy
+HG_WC_PATH = "."
+
+# how to handle decode() errors for logfile lines
+LOG_SOURCE_DECODE_ERRORS = 'replace'
+
+# date format for URLs
+URL_DATE_FMT = '%Y-%m-%d'
+
+# month name format
+MONTH_FMT = '%B %Y'
+
+# timezone name format
+TIMEZONE_FMT = '%Z %z'
+
+# TTF fonts to use for drawing images
+FORMATTER_IMAGE_FONTS = {
+ # XXX: no unicode support
+ # 'default': (None, "Ugly default font" ),
+ 'ttf-dejavu-mono': ("/usr/share/fonts/truetype/ttf-dejavu/DejaVuSansMono.ttf", "DejaVu Sans Mono" ),
+ 'ttf-liberation-mono': ("/usr/share/fonts/truetype/ttf-liberation/LiberationMono-Regular.ttf", "Liberation Mono Regular" )
+}
+
+# available formatters
+LOG_FORMATTERS = {
+ 'irssi': IrssiFormatter,
+ 'debug': DebugFormatter,
+}
+
+# Cookie settings
+PREF_COOKIE_PATH = '/'
+PREF_COOKIE_EXPIRE_SECONDS = 1 * 365 * 24 * 60 * 60 # one year
+
+# default preferences
+PREF_TIME_FMT_DEFAULT = '%H:%M:%S'
+PREF_DATE_FMT_DEFAULT = '%Y-%m-%d'
+PREF_TIMEZONE_FALLBACK = pytz.utc
+PREF_FORMATTER_DEFAULT = IrssiFormatter
+PREF_COUNT_DEFAULT = 200
+PREF_COUNT_MAX = None
+PREF_IMAGE_FONT_DEFAULT = 'ttf-dejavu-mono'
+PREF_IMAGE_FONT_SIZE_DEFAULT = 12
+PREF_IMAGE_FONT_SIZE_MAX = 32
+
+# search line count options
+SEARCH_LINE_COUNT_OPTIONS = (
+ (50, 50),
+ (100, 100),
+ (200, 200),
+ (None, "∞"),
+)
+
+# search index database path
+SEARCH_INDEX_PATH = '/home/spbot/irclogs/search-index'
+SEARCH_AUTOINDEX_PATH = '/home/spbot/irclogs/search-autoindex'
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/error.py Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,183 @@
+"""
+ Build error messages
+"""
+
+import traceback, sys, cgi, urllib
+
+def truncate (msg, limit) :
+ """
+ Truncate the given message to <limit> chars
+ """
+
+ if len(msg) > limit :
+ return msg[:limit-3] + '...'
+
+ else :
+ return msg
+
+def build_link (title, url) :
+ return '<a href="%s">%s</a>' % (cgi.escape(url, True), cgi.escape(title))
+
+def build_error (exc_info=None, env=None) :
+ """
+ Dumps out a raw traceback of the given/current exception to stdout.
+
+ If request_env, it should be a environment dict, like under WSGI, and will be used to display additional info
+ about the request.
+
+ Returns a (status, content-type, body) tuple, with all components being non-unicode strs.
+ """
+
+ # default for exc_info is current exception
+ if not exc_info :
+ exc_info = sys.exc_info()
+
+ # request URL?
+ if env :
+ try :
+ from qmsk.web.http import request_url
+
+ url = request_url(env)
+
+ except :
+ # ignore
+ url = None
+ else :
+ url = None
+
+ # working copy path?
+ try :
+ from config import HG_WC_PATH, HGWEB_URL
+
+ wc_path = HG_WC_PATH
+ hgweb_url = HGWEB_URL
+
+ except :
+ # a good guess
+ wc_path = '.'
+ hgweb_url = None
+
+ # version?
+ try :
+ from version import version_string, version_link_hg
+
+ version = version_string(wc_path)
+
+ if hgweb_url :
+ version_href = version_link_hg(hgweb_url, wc_path)
+
+ else :
+ version_href = None
+
+ except :
+ version = None
+ version_href = None
+
+ # the exception type
+ exception_str = traceback.format_exception_only(*exc_info[:2])[-1]
+
+ # the exception traceback
+ traceback_lines = traceback.format_exception(*exc_info)
+
+ # XXX: make this configureable
+ trac_url = "http://projects.qmsk.net/irclogs2/trac"
+
+ # ticket list
+ trac_query = build_link("All tickets", "%s/query" % trac_url)
+
+ # submit ticket
+ submit_args = dict(type='defect')
+
+ # handle optional components
+ if url :
+ submit_args['url'] = url
+ trac_query_url = build_link("Same URL", "%s/query?url=%s" % (trac_url, urllib.quote(url)))
+ else :
+ trac_query_url = ""
+
+ if version :
+ submit_args['revision'] = version
+ trac_query_version = build_link("Same version", "%s/query?revision=%s" % (trac_url, urllib.quote(version)))
+
+ else :
+ trac_query_version = ""
+
+ if exception_str :
+ submit_args['summary'] = truncate(exception_str, 140)
+ trac_query_err = build_link("Same error", "%s/query?summary=%s" % (trac_url, urllib.quote(exception_str.rstrip())))
+
+ else :
+ trac_query_err = ""
+
+ if traceback_lines :
+ # this is big
+ submit_args['description'] = """\
+[Insert any additional information here]
+
+
+= Traceback =
+{{{
+%s
+}}}""" % ''.join(traceback_lines)
+
+ # the trac newticket URL
+ submit_url = "%s/newticket?%s" % (trac_url, '&'.join('%s=%s' % (urllib.quote(k), urllib.quote(v)) for k, v in submit_args.iteritems()))
+
+ # return
+ return ('500 Internal Server Error', 'text/html; charset=UTF-8', ("""\
+<html><head><title>500 Internal Server Error</title></head><body>
+<h1>Oops!</h1>
+<p>
+ An error occured, which was not logged, and was not reported to anybody. It might be your fault, or it might be mine.
+</p>
+
+<p>
+ You can try:
+ <ol style="list-style-type: lower-alpha">
+ <li><strong>Poking</strong> the administrator of this site to see if they respond</li>
+ <li><strong>Looking</strong> for similar issue tickets with:
+ <ul>
+ <li>%(trac_query)s</li>
+ <li>%(trac_query_url)s</li>
+ <li>%(trac_query_version)s</li>
+ <li>%(trac_query_err)s</li>
+ </ul>
+ </li>
+ <li><strong>Submitting</strong> a new ticket using the following link (quick & easy):</li>
+ </ol>
+</p>
+<pre>
+ <a href="%(submit_url)s">%(submit_url_short)s</a>
+</pre>
+
+<h2>Details:</h2>
+<p>The page you tried to request was:</p>
+<pre>
+ %(url)s
+</pre>
+
+<p>The software version is:</p>
+<pre>
+ %(version_link)s
+</pre>
+
+<p>The error was:</p>
+<pre>
+ %(exception)s
+</pre>
+
+<p>The traceback was:</p>
+<pre>%(traceback)s</pre>
+</body></html>""" % dict(
+ url = url if url else 'Unknown',
+ version_link = version_href if version_href else 'Unknown',
+ exception = truncate(exception_str, 512),
+ traceback = cgi.escape(''.join(' ' + line for line in traceback_lines)),
+ trac_query = trac_query,
+ trac_query_url = trac_query_url,
+ trac_query_version = trac_query_version,
+ trac_query_err = trac_query_err,
+ submit_url = submit_url,
+ submit_url_short = truncate(submit_url, 120)
+ )).encode('utf-8'))
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/handlers.py Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,292 @@
+"""
+ Our URL action handlers
+"""
+
+import datetime, calendar, pytz
+
+from qmsk.web import http, template
+
+import urls, channels, helpers
+import preferences as prefs
+from preferences import preferences
+import config, log_search
+
+# load templates from here
+templates = template.TemplateLoader("templates",
+ _helper_class = helpers.Helpers,
+ urls = urls,
+ channel_list = config.LOG_CHANNELS,
+ config = config,
+)
+
+# return a http.Response for the given text in the given format
+def _render_type (request, channel, lines, type, full_timestamps=False) :
+ """
+ Render the given LogLines as a http.Response in the given format, which is one of:
+ html - XXX: not supported
+ txt - Plaintext
+ png - PNG image
+ rss - RSS feed
+ """
+
+ # load related preferences
+ formatter = request.prefs['formatter']
+
+ kwargs = dict(
+ full_timestamps = full_timestamps
+ )
+
+ # we can render in various modes...
+ if type in ('html', None) :
+ xxx
+
+ elif type == 'txt' :
+ # plaintext
+ lines = formatter.format_txt(lines, **kwargs)
+
+ # build data
+ data = '\n'.join(data for line, data in lines)
+
+ return http.Response(data, 'text/plain')
+
+ elif type == 'png' :
+ # PNG image
+ png_data = formatter.format_png(lines, **kwargs)
+
+ return http.Response(png_data, 'image/png', charset=None)
+
+ elif type == 'rss' :
+ # RSS feed
+ rss_data = formatter.format_rss(lines, **kwargs)
+
+ # XXX: fix to render as unicode?
+ return http.Response(rss_data, 'application/rss+xml', charset=None)
+
+ else :
+ raise http.ResponseError("Unrecognized type: %r" % (type, ))
+
+def _render_date (request, channel, date, lines, type, count, page, max) :
+ """
+ Render the given LogLines as a http.Response for channel_date
+ """
+
+ # type?
+ if type :
+ # special type
+ return _render_type(request, channel, lines, type)
+
+ else :
+ # format HTML
+ lines = request.prefs['formatter'].format_html(lines)
+
+ # render
+ return templates.render_to_response("channel_date",
+ req = request,
+ prefs = request.prefs,
+ channel = channel,
+ date = date,
+ count = count,
+ page = page,
+ max = max,
+ lines = lines,
+
+ # for prev/next date
+ date_next = channel.source.get_next_date(date),
+ date_prev = channel.source.get_prev_date(date),
+ )
+
+@preferences.handler()
+def index (request) :
+ """
+ The topmost index page, display a list of available channels, perhaps some general stats
+ """
+
+ return templates.render_to_response("index",
+ req = request,
+ prefs = request.prefs,
+ )
+
+# XXX: fix this namespace crap
+@preferences.handler()
+def preferences_ (request) :
+ """
+ Preferences editor
+ """
+
+ # POST?
+ if request.is_post() :
+ # update any modified preferences
+ for pref in preferences.pref_list :
+ # get the POST'd value, default = None
+ post_value = request.get_post(pref.name, None)
+
+ # skip non-specified values
+ # XXX: this is to not clobber timezone_offset to None
+ if post_value is None :
+ continue
+
+ # parse the POST'd value, None -> default
+ new_value = request.prefs.parse(pref, post_value)
+
+ # update if given and changed
+ if new_value != request.prefs[pref] :
+ request.prefs.set(pref.name, new_value)
+
+ # render
+ return templates.render_to_response("preferences",
+ req = request,
+ prefs = request.prefs,
+ preferences = prefs,
+ )
+
+def channel_select (request, channel) :
+ """
+ Redirect to the appropriate channel_view
+ """
+
+ return http.Redirect(urls.channel.build(request, channel=channel))
+
+@preferences.handler(prefs.formatter)
+def channel_last (request, channel, count, formatter, type=None) :
+ """
+ The main channel view page, displaying the most recent lines
+ """
+
+ # get latest events
+ lines = channel.source.get_latest(count)
+
+ # type?
+ if type :
+ # other format
+ return _render_type(request, channel, lines, type)
+
+ else :
+ # format HTML
+ lines = formatter.format_html(lines)
+
+ # render page
+ return templates.render_to_response("channel_last",
+ req = request,
+ prefs = request.prefs,
+ channel = channel,
+ count = count,
+ lines = lines,
+ )
+
+@preferences.handler(prefs.formatter, prefs.timezone, prefs.count)
+def channel_link (request, channel, timestamp, formatter, timezone, count, type=None) :
+ """
+ Display channel_date for specific UTC timestamp
+ """
+
+ # convert timestamp to user's timezone
+ timestamp = timestamp.astimezone(timezone)
+
+ # get correct day's correct page of lines
+ page, max, lines = channel.source.get_date_paged(timestamp, count)
+
+ # render channel_date
+ return _render_date (request, channel, timestamp, lines, type, count, page, max)
+
+@preferences.handler(prefs.timezone)
+def channel_calendar (request, channel, year, month, timezone) :
+ """
+ Display a list of avilable logs for some month
+ """
+
+ # current date as default
+ now = timezone.localize(datetime.datetime.now())
+
+ # target year/month
+ target = timezone.localize(datetime.datetime(
+ year = year if year else now.year,
+ month = month if month else now.month,
+ day = 1
+ ))
+
+ # display calendar
+ return templates.render_to_response("channel_calendar",
+ req = request,
+ prefs = request.prefs,
+ channel = channel,
+ month = target,
+ )
+
+@preferences.handler(prefs.count, prefs.timezone)
+def channel_date (request, channel, date, count, timezone, page=1, type=None) :
+ """
+ Display all log data for the given date
+ """
+
+ # convert date to user's timezone
+ date = timezone.localize(date)
+
+# print
+# print "channel_date: date=%s" % date
+
+ # get that day's events, either paged or not
+ if page :
+ page, max, lines = channel.source.get_date_paged(date, count, page)
+
+ else :
+ lines = channel.source.get_date(date)
+ max = None
+
+ # render channel_date
+ return _render_date (request, channel, date, lines, type, count, page, max)
+
+@preferences.handler(prefs.formatter, prefs.count)
+def channel_search (request, channel, formatter, count, q=None, page=1, max=1, type=None, t=None) :
+ """
+ Display the search form for the channel for GET, or do the search for POST.
+ """
+
+ # calculate skip offset from page/count
+ skip = (page - 1) * count
+
+ # got a search query?
+ if q :
+ # attribute targets
+ targets = dict(('search_%s' % target, True) for target in t if target in ('msg', 'nick')) if t else {}
+
+ try :
+ # do search
+ lines = log_search.get_index().search_simple(channel, q, count, skip, **targets)
+
+ # update max?
+ if max and page > max :
+ max = page
+
+ except log_search.NoResultsFound :
+ # no results
+ lines = None
+
+ else :
+ # just display the search form
+ lines = None
+
+ # type?
+ if type and lines :
+ # special type
+ return _render_type(request, channel, lines, type, full_timestamps=True)
+
+ else :
+ # format lines to HTML if any
+ if lines :
+ # format
+ lines = formatter.format_html(lines, full_timestamps=True)
+
+ # render page
+ return templates.render_to_response("channel_search",
+ req = request,
+ prefs = request.prefs,
+ channel = channel,
+ search_query = q,
+ search_targets = t,
+ count = count,
+ page = page,
+ skip = skip,
+ max = max,
+ lines = lines,
+ )
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/helpers.py Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,243 @@
+"""
+ Some additional helpers
+"""
+
+import datetime
+import calendar as _calendar
+
+import qmsk.web.helpers
+
+import preferences, urls, config, version
+
+class Helpers (qmsk.web.helpers.Helpers) :
+ """
+ Our set of helpers, inheriting from base helpers
+ """
+
+ # set contructor...
+ set = set
+
+ # reference to calendar instance
+ calendar = _calendar.Calendar()
+
+ # list of (month_num, month_name) for the months in the year
+ months = list(enumerate(_calendar.month_name))[1:]
+
+ def version_link (self) :
+ """
+ Returns a <a href> representing this version of the software
+ """
+
+ return version.version_link_hg(config.HGWEB_URL, config.HG_WC_PATH)
+
+ def tz_name (self, tz) :
+ """
+ Returns a string describing the given timezone
+ """
+
+ return self.now().strftime(config.TIMEZONE_FMT)
+
+ def fmt_month (self, date) :
+ """
+ Formats a month
+ """
+
+ return date.strftime(config.MONTH_FMT)
+
+ def fmt_weekday (self, wday) :
+ """
+ Formats an abbreviated weekday name
+ """
+
+ return _calendar.day_abbr[wday]
+
+ def build_date (self, month, mday) :
+ """
+ Returns a datetime.datetime for the given (month.year, month.month, mday)
+ """
+
+ return self.ctx['prefs'][preferences.timezone].localize(datetime.datetime(month.year, month.month, mday))
+
+ def now (self) :
+ """
+ Build current time
+ """
+
+ return self.ctx['prefs'][preferences.timezone].localize(datetime.datetime.now())
+
+ def today (self) :
+ """
+ Build today's date
+ """
+
+ return self.now().date()
+
+ def is_today (self, dt) :
+ """
+ Checks if the given datetime.datetime is today
+ """
+
+ # compare with current date
+ return dt.date() == self.today()
+
+ def is_this_month (self, month) :
+ """
+ Checks the given month is the current month
+ """
+
+ today = self.today()
+
+ return (month.year == today.year and month.month == today.month)
+
+ @staticmethod
+ def _wrap_year (year, month) :
+ """
+ Wraps month to between [1, 12], spilling overflow/underflow by to year.
+
+ Returns (year, month)
+ """
+
+ # underflow?
+ if month == 0 :
+ # wrap to previous year
+ return (year - 1, 12)
+
+ # overflow?
+ elif month == 13 :
+ # wrap to next year
+ return (year + 1, 1)
+
+ # sane value
+ elif 1 <= month <= 12 :
+ return (year, month)
+
+ # insane value
+ else :
+ assert False, "invalid year/month: %d/%d" % (year, month)
+
+ def prev_month (self, month) :
+ """
+ Returns the month preceding the given one (as a datetime.datetime)
+ """
+
+ # previous month
+ y, m = self._wrap_year(month.year, month.month - 1)
+
+ # build datetime
+ return datetime.datetime(year=y, month=m, day=1, tzinfo=month.tzinfo)
+
+ def next_month (self, month) :
+ """
+ Returns the month following the given one (as a datetime.datetime)
+ """
+
+ # previous month
+ y, m = self._wrap_year(month.year, month.month + 1)
+
+ # build datetime
+ return datetime.datetime(year=y, month=m, day=1, tzinfo=month.tzinfo)
+
+ def fmt_time (self, time=None) :
+ """
+ Format given time, or current time
+ """
+
+ # defaults
+ if not time :
+ time = self.now()
+
+ return time.strftime(self.ctx['prefs'][preferences.time_format])
+
+ def fmt_date (self, date=None) :
+ """
+ Format given date, or current date
+ """
+
+ # defaults
+ if not date :
+ date = self.now()
+
+ return date.strftime(self.ctx['prefs'][preferences.date_format])
+
+ def url (self, url, **params) :
+ """
+ Build URL with our request object
+ """
+
+ return url.build(self.ctx['req'], **params)
+
+ # old name
+ build_url = url
+
+ def utc_timestamp (self, dtz) :
+ """
+ Build an UTC timestamp from the given datetime
+ """
+
+ return urls.types['ts'].build(dtz)
+
+ def skip_next (self, count, skip) :
+ """
+ Return skip offset for next page
+ """
+
+ return count + skip
+
+ def skip_page (self, count, page) :
+ """
+ Skip to page
+ """
+
+ if page :
+ return count * page
+
+ else :
+ return None
+
+ def skip_prev (self, count, skip) :
+ """
+ Return skip offset for previous page, None for first page
+ """
+
+ if skip > count :
+ return skip - count
+
+ else :
+ return None
+
+ def max (self, *values) :
+ """
+ Returns the largest of the given values
+ """
+
+ return max(values)
+
+ def select_options (self, key_values, selected_key=None) :
+ """
+ Render a series of <option> tags for <select>.
+
+ The given key_values is an iterable of (key, value) pairs, key may be None if it's the same as value.
+ """
+
+ return '\n'.join(
+ '\t<option%s%s>%s</option>' % (
+ ' value="%s"' % key if key is not None else '',
+ ' selected="selected"' if (key if key is not None else value) == selected_key else '',
+ value
+ ) for key, value in key_values
+ )
+
+ def prev_date (self, date) :
+ """
+ Returns the previous date for the given datetime-date
+ """
+
+ return datetime.datetime(date.year, date.month, date.day, tzinfo=date.tzinfo) - datetime.timedelta(days=1)
+
+ def next_date (self, date) :
+ """
+ Returns the previous date for the given datetime-date
+ """
+
+ return datetime.datetime(date.year, date.month, date.day, tzinfo=date.tzinfo) + datetime.timedelta(days=1)
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/log_channel.py Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,54 @@
+"""
+ A channel represents a series of log events, stored in some log source
+"""
+
+import log_search
+
+class LogChannel (object) :
+ """
+ A single IRC channel, logged to some specific place
+ """
+
+ def __init__ (self, id, network, name, source) :
+ """
+ Initialize this channel from the given identifier key, network name, channel name, and LogSource
+ """
+
+ # store
+ self.id = id
+ self.network = network
+ self.name = name
+ self.source = source
+
+ # bind source
+ self.source.bind_channel(self)
+
+ @property
+ def title (self) :
+ """
+ Title is 'Network - #channel'
+ """
+
+ return "%s - %s" % (self.network, self.name)
+
+ def search (self, query) :
+ """
+ Perform a search on this channel, returning a sequence of LogLines
+ """
+
+ return log_search.index.search_simple(self, query)
+
+ def __str__ (self) :
+ """
+ Returns self.title
+ """
+
+ return self.title
+
+ def __repr__ (self) :
+ """
+ Uses self.id
+ """
+
+ return "LogChannel(%s)" % (self.id, )
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/log_formatter.py Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,257 @@
+"""
+ Format LogLines into some other representation
+"""
+
+import re, xml.sax.saxutils
+
+from log_line import LogTypes
+from log_formatter_pil import PILImageFormatter
+from log_formatter_rss import RSSFormatter
+
+class LogFormatter (object) :
+ """
+ Provides a method to format series of LogLines into various output formats, with varying themes.
+ """
+
+ # machine-readable name
+ name = None
+
+ # human-readable name
+ title = None
+
+ ## parameters
+ # use a fixed-width font for HTML output
+ html_fixedwidth = True
+
+ def __init__ (self, tz, timestamp_fmt, img_ttf_path, img_font_size) :
+ """
+ Initialize to format timestamps with the given timezone and timestamp.
+
+ Use the given TTF font to render image text with the given size, if given, otherwise, a default one.
+ """
+
+ # store
+ self.tz = tz
+ self.timestamp_fmt = timestamp_fmt
+ self.img_ttf_path = img_ttf_path
+ self.img_font_size = img_font_size
+
+ # XXX: harcoded
+ self.date_fmt = '%Y-%m-%d'
+
+ def _format_line_text (self, line, template_dict, type=None, full_timestamp=False, **extra) :
+ """
+ Format the given line as text, using the given { type: string template } dict.
+
+ If type is given, then it overrides line.type
+
+ Any additional keyword args will also be available for the template to use
+ """
+
+ # default type?
+ if type is None :
+ type = line.type
+
+ # look up the template
+ if type in template_dict :
+ template = template_dict[type]
+
+ else :
+ raise Exception("Format template not defined for type: %s" % LogTypes.name_from_code(type))
+
+ # convert timestamp into display timezone
+ dtz = line.timestamp.astimezone(self.tz)
+
+ # full timestamps?
+ if full_timestamp :
+ # XXX: let the user define a 'datetime' format instead?
+ timestamp_fmt = self.date_fmt + ' ' + self.timestamp_fmt
+
+ else :
+ timestamp_fmt = self.timestamp_fmt
+
+ # breakdown source
+ source_nickname, source_username, source_hostname, source_chanflag = line.source
+ target_nickname = line.target
+
+ # format with dict
+ return template % dict(
+ channel_name = line.channel.name,
+ datetime = dtz.strftime('%a %b %d %H:%M:%S %Y'),
+ date = dtz.strftime(self.date_fmt),
+ timestamp = dtz.strftime(timestamp_fmt),
+ source_nickname = source_nickname,
+ source_username = source_username,
+ source_hostname = source_hostname,
+ source_chanflag = source_chanflag,
+ target_nickname = target_nickname,
+ message = line.data,
+ **extra
+ )
+
+ def format_txt (self, lines, full_timestamps=False) :
+ """
+ Format given lines as plaintext.
+
+ If full_timestamps is given, the output will contain full timestamps with both date and time.
+
+ No trailing newlines.
+ """
+
+ abstract
+
+ def format_html (self, lines, full_timestamps=False) :
+ """
+ Format as HTML.
+
+ See format_txt for information about arguments
+ """
+
+ abstract
+
+ def format_png (self, lines, full_timestamps=False) :
+ """
+ Format as a PNG image, returning the binary PNG data
+ """
+
+ abstract
+
+ def format_rss (self, lines, full_timestamps=False) :
+ """
+ Format as an XML RSS document
+ """
+
+ abstract
+
+class BaseHTMLFormatter (LogFormatter) :
+ """
+ Implements some HTML-formatting utils
+ """
+
+ # parameters
+ html_fixedwidth = True
+
+ # regexp to match URLs
+ URL_REGEXP = re.compile(r"http://\S+")
+
+ def _process_links (self, line) :
+ """
+ Processed the rendered line, adding in <a href>'s for things that look like URLs, returning the new line.
+
+ The line should already be escaped
+ """
+
+ def _encode_url (match) :
+ # encode URL
+ url_html = match.group(0)
+ url_link = xml.sax.saxutils.unescape(url_html)
+
+ return '<a href="%(url_link)s">%(url_html)s</a>' % dict(url_link=url_link, url_html=url_html)
+
+ return self.URL_REGEXP.sub(_encode_url, line)
+
+ def format_html (self, lines, **kwargs) :
+ """
+ Just uses format_txt, but processes links, etc
+ """
+
+ # format using IrssiTextFormatter
+ for line, txt in self.format_txt(lines, **kwargs) :
+ # escape HTML
+ html = xml.sax.saxutils.escape(txt)
+
+ # process links
+ html = self._process_links(html)
+
+ # yield
+ yield line, html
+
+
+class IrssiTextFormatter (RSSFormatter, PILImageFormatter, LogFormatter) :
+ """
+ Implements format_txt for irssi-style output
+ """
+
+ # format definitions by type
+ __FMT = {
+ LogTypes.RAW : "%(timestamp)s %(data)s",
+ LogTypes.LOG_OPEN : "--- Log opened %(datetime)s",
+ LogTypes.LOG_CLOSE : "--- Log closed %(datetime)s",
+ 'DAY_CHANGED' : "--- Day changed %(date)s",
+
+ LogTypes.MSG : "%(timestamp)s <%(source_chanflag)s%(source_nickname)s> %(message)s",
+ LogTypes.NOTICE : "%(timestamp)s -%(source_nickname)s- %(message)s",
+ LogTypes.ACTION : "%(timestamp)s * %(source_nickname)s %(message)s",
+
+ LogTypes.JOIN : "%(timestamp)s -!- %(source_nickname)s [%(source_username)s@%(source_hostname)s] has joined %(channel_name)s",
+ LogTypes.PART : "%(timestamp)s -!- %(source_nickname)s [%(source_username)s@%(source_hostname)s] has left %(channel_name)s [%(message)s]",
+ LogTypes.KICK : "%(timestamp)s -!- %(target_nickname)s was kicked from %(channel_name)s by %(source_nickname)s [%(message)s]",
+ LogTypes.MODE : "%(timestamp)s -!- mode/%(channel_name)s [%(message)s] by %(source_nickname)s",
+
+ LogTypes.NICK : "%(timestamp)s -!- %(source_nickname)s is now known as %(target_nickname)s",
+ LogTypes.QUIT : "%(timestamp)s -!- %(source_nickname)s [%(source_username)s@%(source_hostname)s] has quit [%(message)s]",
+
+ LogTypes.TOPIC : "%(timestamp)s -!- %(source_nickname)s changed the topic of %(channel_name)s to: %(message)s",
+ 'TOPIC_UNSET' : "%(timestamp)s -!- Topic unset by %(source_nickname)s on %(channel_name)s",
+
+ LogTypes.SELF_NOTICE: "%(timestamp)s -%(source_nickname)s- %(message)s",
+ LogTypes.SELF_NICK : "%(timestamp)s -!- %(source_nickname)s is now known as %(target_nickname)s",
+
+ LogTypes.NETSPLIT_START :
+ "%(timestamp)s -!- Netsplit %(source_hostname)s <-> %(target_nickname)s quits: %(_netsplit_targets)s",
+ LogTypes.NETSPLIT_END :
+ "%(timestamp)s -!- Netsplit over, joins: %(_netsplit_targets)s",
+ }
+
+ def format_txt (self, lines, full_timestamps=False) :
+ # ...handle each line
+ for line in lines :
+ # extra args
+ extra = {}
+
+ # default to line.type
+ type = line.type
+
+ # special formatting for unset-Topic
+ if line.type == LogTypes.TOPIC and line.data is None :
+ type = 'TOPIC_UNSET'
+
+ # format netsplit stuff
+ elif line.type & LogTypes._NETSPLIT_MASK :
+ # format the netsplit-targets stuff
+ extra['_netsplit_targets'] = line.data
+
+ # using __TYPES
+ yield line, self._format_line_text(line, self.__FMT, type, full_timestamps, **extra)
+
+class IrssiFormatter (BaseHTMLFormatter, IrssiTextFormatter) :
+ """
+ Implements plain black-and-white irssi-style formatting
+ """
+
+ # name
+ name = 'irssi'
+ title = "Irssi (plain)"
+
+class DebugFormatter (BaseHTMLFormatter) :
+ """
+ Implements a raw debug-style formatting of LogLines
+ """
+
+ # name
+ name = 'debug'
+ title = "Raw debugging format"
+
+ def format_txt (self, lines, full_timestamps=False) :
+ # iterate
+ for line in lines :
+ # just dump
+ yield line, unicode(line)
+
+def by_name (name) :
+ """
+ Lookup and return a class LogFormatter by name
+ """
+
+ return FORMATTERS[name]
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/log_formatter_pil.py Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,81 @@
+"""
+ Use of PIL to render the image formatting stuff
+"""
+
+from PIL import Image, ImageDraw, ImageFont
+
+from cStringIO import StringIO
+
+class PILImageFormatter (object) :
+ """
+ Mixin for LogFormatter that implements the basic image-rendering operations on top of format_txt
+ """
+
+ # the font we load
+ font = None
+
+ # line spacing in pixels
+ LINE_SPACING = 1
+
+ def _load_font (self) :
+ """
+ Use the configured img_ttf_path for a TrueType font, or a default one
+ """
+
+ if self.font :
+ pass
+
+ elif self.img_ttf_path :
+ # load truetype with configured size
+ self.font = ImageFont.truetype(self.img_ttf_path, self.img_font_size)
+
+ else :
+ # default
+ self.font = ImageFont.load_default()
+
+ return self.font
+
+ def format_png (self, lines, **kwargs) :
+ """
+ Build and return a PNG image of the given lines, using format_txt
+ """
+
+ # load font
+ font = self._load_font()
+
+ # build list of plain-text line data
+ lines = list(data for line, data in self.format_txt(lines, **kwargs))
+
+ # lines sizes
+ line_sizes = [font.getsize(line) for line in lines]
+
+ # figure out how wide/high the image will be
+ width = max(width for width, height in line_sizes)
+ height = sum(height + self.LINE_SPACING for width, height in line_sizes)
+
+ # create new B/W image
+ img = Image.new('L', (width, height), 0xff)
+
+ # drawer
+ draw = ImageDraw.Draw(img)
+
+ # starting offset
+ offset_y = 0
+
+ # draw the lines
+ for line, (width, height) in zip(lines, line_sizes) :
+ # draw
+ draw.text((0, offset_y), line, font=font)
+
+ # next offset
+ offset_y += height + self.LINE_SPACING
+
+ # output buffer
+ buf = StringIO()
+
+ # save
+ img.save(buf, 'png')
+
+ # return data
+ return buf.getvalue()
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/log_formatter_rss.py Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,41 @@
+"""
+ Uses PyRSS2Gen to generate XML RSS documents
+"""
+
+import PyRSS2Gen as RSS2Gen
+import datetime, pytz
+
+class RSSFormatter (object) :
+ """
+ Mixin for LogFormatter that implements the basic RSS-rendering stuff on top of format_html
+ """
+
+ def format_rss (self, lines, **kwargs) :
+ """
+ Process using format_html
+ """
+
+ # build the RSS2 object and return the XML
+ return RSS2Gen.RSS2(
+ title = "IRC RSS feed",
+ link = "http://irclogs.qmsk.net/",
+ description = "A stupid RSS feed that nobody sane would ever use",
+
+ # XXX: GMT
+ lastBuildDate = datetime.datetime.utcnow(),
+
+ items = [
+ RSS2Gen.RSSItem(
+ # use the formatted HTML data as the title
+ title = html_data,
+
+ # timestamp
+ pubDate = line.timestamp.astimezone(pytz.utc),
+
+ # link
+ link = "http://xxx/",
+
+ ) for line, html_data in self.format_html(lines, **kwargs)
+ ]
+ ).to_xml('utf8')
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/log_line.py Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,186 @@
+"""
+ An IRC logfile consists of a series of lines/events
+"""
+
+class LogTypes :
+ """
+ Definitions of the various LogLines types:
+
+ LogTypes.RAW
+ LogTypes.LOG_OPEN
+ LogTypes.LOG_CLOSE
+
+ LogTypes.MSG
+ LogTypes.NOTICE
+ LogTypes.ACTION
+
+ LogTypes.JOIN
+ LogTypes.PART
+ LogTypes.KICK
+ LogTypes.MODE
+
+ LogTypes.NICK
+ LogTypes.QUIT
+
+ LogTypes.TOPIC
+
+ LogTypes.SELF_NOTICE
+ LogTypes.SELF_NICK
+ """
+
+ # list of LogType values by name
+ LIST = [
+ ## special
+ # unknown type, may or may not have a timestamp, no source, only data
+ ('RAW', 0x01),
+
+ # log opened
+ ('LOG_OPEN', 0x02),
+
+ # log closed
+ ('LOG_CLOSE', 0x03),
+
+ ## messages
+ # <source> sent message <data> to <channel>
+ ('MSG', 0x10),
+
+ # <source> sent notice with message <data> to <channel>
+ ('NOTICE', 0x11),
+
+ # <source> sent CTCP action with message <data> to <channel>
+ ('ACTION', 0x12),
+
+ ## user-channel stats
+ # <source> joined <channel>
+ ('JOIN', 0x21),
+
+ # <source> left <channel> with message <data>
+ ('PART', 0x22),
+
+ # <source> kicked <target> from <channel> with message <data>
+ ('KICK', 0x25),
+
+ # <source> changed modes on <channel> with modestring <data>
+ ('MODE', 0x26),
+
+ ## user status
+ # <source> changed nickname to <target>
+ ('NICK', 0x31),
+
+ # <source> quit the network with quit-message <data>
+ ('QUIT', 0x32),
+
+ ## general channel status
+ # <source> changed the topic of <channel> to <data>
+ # data may be None if the topic was unset
+ ('TOPIC', 0x41),
+
+ ## our own actions
+ # we (<source>) sent a notice with message <data> to <channel>
+ ('SELF_NOTICE', 0x51),
+
+ # we (<source>) changed nickname to <target>
+ ('SELF_NICK', 0x52),
+
+ ## slightly weirder bits
+ # netsplit between <source_hostname> and <target_hostname>, <data> is a space-separated list of <chanflags><nickname>s affected
+ # the last item in the list of nicknames may also be of the form "+<count>", where count is the number of additional, but hidden, nicknames affected
+ ('NETSPLIT_START', 0x61),
+
+ # netsplit over, <data> is a list of users affected, see NETSPLIT_START
+ ('NETSPLIT_END', 0x062),
+ ]
+
+ @classmethod
+ def name_from_code (cls, code) :
+ """
+ Looks up a LogType name by code
+ """
+
+ return dict((type, name) for name, type in cls.LIST)[code]
+
+# apply as attributes
+for name, code in LogTypes.LIST :
+ setattr(LogTypes, name, code)
+
+# masks
+LogTypes._NETSPLIT_MASK = 0x60
+
+class LogLine (object) :
+ """
+ An event on some specific channel
+ """
+
+ # the LogChannel
+ channel = None
+
+ # the offset, only garunteed to be unique for a specific channel and date
+ offset = None
+
+ # the event type, as defiend in LogTypes
+ type = None
+
+ # the UTC timestamp of the event
+ timestamp = None
+
+ # the source, this should be a (nickname, username, hostname, chanflags) tuple
+ source = None
+
+ # possible target nickname for certain types (kick, nick)
+ target = None
+
+ # associated data (message, etc)
+ data = None
+
+ def __init__ (self, channel, offset, type, timestamp, source, target, data) :
+ """
+ Initialize with given values
+ """
+
+ self.channel = channel
+ self.offset = offset
+ self.type = type
+ self.timestamp = timestamp
+ self.source = source
+ self.target = target
+ self.data = data
+
+ def format_type (self) :
+ """
+ Formats type as a string code
+ """
+
+ return LogTypes.name_from_code(self.type)
+
+ def format_source (self) :
+ """
+ Formats source as [<chanflags>][<nickname>][!<username>][@<hostname>], omitting those parts that are missing.
+
+ If all parts are None, this returns the empty string
+ """
+
+ nick, user, host, flags = self.source
+
+ return "%s%s%s%s" % (
+ flags if flags and flags != ' ' else '',
+ nick if nick else '',
+ '!' + user if user else '',
+ '@' + host if host else ''
+ )
+
+ def __unicode__ (self) :
+ return '\t'.join((
+ self.channel.name,
+ str(self.offset),
+ self.format_type(),
+ str(self.timestamp),
+ self.format_source(),
+ str(self.target),
+ unicode(self.data)
+ ))
+
+ def __repr__ (self) :
+ return "LogLine(%r, %s, %-12s, %s, %-35s, %-10s, %r)" % (
+ self.channel, self.offset, self.format_type(), self.timestamp, self.format_source(), self.target, self.data
+ )
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/log_parser.py Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,233 @@
+"""
+ Parse log data into log_events
+"""
+
+import re
+import datetime
+
+from log_line import LogTypes, LogLine
+
+class LogParseError (Exception) :
+ """
+ Parsing some line failed
+ """
+
+ def __init__ (self, line, offset, message) :
+ super(LogParseError, self).__init__("%r@%s: %s" % (line, offset, message))
+
+class LogParser (object) :
+ """
+ Abstract interface
+ """
+
+ def __init__ (self, tz, timestamp_fmt="%H:%M:%S") :
+ """
+ Setup the parser to use the given format for line timestamps, which are of the given timezone
+ """
+
+ self.tz = tz
+ self.timestamp_fmt = timestamp_fmt
+
+ def parse_lines (self, channel, lines, date=None, starting_offset=None) :
+ """
+ Parse the given (iterable) lines of unicode text into a LogEvent, no trailing newline.
+
+ Channel is the LogChannel that these lines belong to.
+
+ Offset is the starting offset, and may be None to not use it.
+
+ Giving date lets the parser build full timestamps, otherwise, unless line timestamps have full date
+ information, event timestamps will have a date component of 1900/1/1.
+ """
+
+ abstract
+
+class IrssiParser (LogParser) :
+ """
+ A parser for irssi logfiles
+ """
+
+ # timestamp prefix, with trailing space
+ _TS = r'(?P<timestamp>[a-zA-Z0-9: ]+[a-zA-Z0-9])\s*'
+
+ # subexpression parts
+ _NICK = r'(?P<nickname>.+?)'
+ _NICK2 = r'(?P<nickname2>.+?)'
+ _TARGET = r'(?P<target>.+?)'
+ _CHAN = r'(?P<channel>.+?)'
+ _CHAN2 = r'(?P<channel2>.+?)'
+ _USERHOST = r'(?P<username>.*?)@(?P<hostname>.*?)'
+ _MSG = r'(?P<message>.*)'
+ _SRV1 = r'(?P<server1>.+?)'
+ _SRV2 = r'(?P<server2>.+?)'
+
+ # regular expressions for matching lines, by type
+ TYPE_EXPRS = (
+ ( LogTypes.LOG_OPEN, r'--- Log opened (?P<datetime>.+)' ),
+ ( LogTypes.LOG_CLOSE, r'--- Log closed (?P<datetime>.+)' ),
+ ( LogTypes.MSG, _TS + r'<(?P<flags>.)' + _NICK + '> ' + _MSG ),
+ ( LogTypes.NOTICE, _TS + r'-' + _NICK + ':' + _CHAN + '- ' + _MSG ),
+ ( LogTypes.ACTION, _TS + r'\* ' + _NICK + ' ' + _MSG ),
+ ( LogTypes.JOIN, _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has joined ' + _CHAN ),
+ ( LogTypes.PART, _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has left ' + _CHAN + ' \[(?P<message>.*?)\]' ),
+ ( LogTypes.KICK, _TS + r'-!- ' + _TARGET + ' was kicked from ' + _CHAN + ' by ' + _NICK + ' \[(?P<message>.*?)\]' ),
+ # XXX: use hostname instead of nickname for ServerMode
+ ( LogTypes.MODE, _TS + r'-!- (mode|ServerMode)/' + _CHAN + ' \[(?P<mode>.+?)\] by (?P<nickname>\S+)' ),
+ ( LogTypes.NICK, _TS + r'-!- ' + _NICK + ' is now known as (?P<target>\S+)' ),
+ ( LogTypes.QUIT, _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has quit \[(?P<message>.*?)\]' ),
+ ( LogTypes.TOPIC, _TS + r'-!- (' + _NICK + ' changed the topic of ' + _CHAN + ' to: (?P<topic>.*)|Topic unset by ' + _NICK2 + ' on ' + _CHAN2 + ')' ),
+
+ ( LogTypes.SELF_NOTICE, _TS + r'\[notice\(' + _CHAN + '\)\] ' + _MSG ),
+ ( LogTypes.SELF_NICK, _TS + r'-!- You\'re now known as (?P<target>\S+)' ),
+
+ ( LogTypes.NETSPLIT_START, _TS + r'-!- Netsplit ' + _SRV1 + ' <-> ' + _SRV2 + ' quits: (?P<nick_list>[^(]+)( \(\+(?P<count>\d+) more,\S+\))?'),
+ ( LogTypes.NETSPLIT_END, _TS + r'-!- Netsplit over, joins: (?P<nick_list>[^(]+)( \(\+(?P<count>\d+) more\))?' ),
+
+ ( 'DAY_CHANGED', r'--- Day changed (?P<date>.+)' ),
+ )
+
+ # precompile
+ TYPE_REGEXES = [(type, re.compile(expr)) for type, expr in TYPE_EXPRS]
+
+ def parse_line (self, channel, line, date, offset=None) :
+ """
+ Parse a single line, and return the resulting LogLine, or None, to ignore the line.
+
+ Uses self.TYPE_REGEXES to do the matching
+ """
+
+ # empty line
+ if not line :
+ return
+
+ # look for match
+ match = type = None
+
+ # test each type
+ for type, regex in self.TYPE_REGEXES :
+ # attempt to match
+ match = regex.match(line)
+
+ # found, break
+ if match :
+ break
+
+ # no match found?
+ if not match :
+ raise LogParseError(line, offset, "Line did not match any type")
+
+ # match groups
+ groups = match.groupdict(None)
+
+ # parse timestamp
+ if 'datetime' in groups :
+ # parse datetime using default asctime() format
+ dt = datetime.datetime.strptime(groups['datetime'], '%a %b %d %H:%M:%S %Y')
+
+ elif 'timestamp' in groups :
+ # parse timestamp into naive datetime
+ dt = datetime.datetime.strptime(groups['timestamp'], self.timestamp_fmt)
+
+ # override date?
+ if date :
+ dt = dt.replace(year=date.year, month=date.month, day=date.day)
+
+ elif 'date' in groups :
+ # parse date-only datetime
+ dt = datetime.datetime.strptime(groups['date'], '%a %b %d %Y')
+
+ else :
+ # no timestamp !?
+ raise LogParseError(line, offset, "No timestamp")
+
+ # now localize with timezone
+ dtz = self.tz.localize(dt)
+
+ # channel, currently unused
+ channel_name = (groups.get('channel') or groups.get('channel2'))
+
+ # source
+ if 'server1' in groups :
+ source = (None, None, groups.get('server1'), None)
+
+ else :
+ source = (groups.get('nickname') or groups.get('nickname2'), groups.get('username'), groups.get('hostname'), groups.get('flags'))
+
+ # target
+ if 'server2' in groups :
+ target = groups.get('server2')
+
+ else :
+ target = groups.get('target')
+
+ # data
+ if 'message' in groups :
+ data = groups['message']
+
+ elif 'mode' in groups :
+ data = groups['mode']
+
+ elif 'topic' in groups :
+ data = groups['topic']
+
+ elif 'nick_list' in groups :
+ # split into components
+ list = groups['nick_list'].split(', ')
+
+ # additional count?
+ if 'count' in groups and groups['count'] :
+ list.append('+%d' % int(groups['count']))
+
+ # join
+ data = ' '.join(list)
+
+ else :
+ data = None
+
+ # custom types?
+ if type == 'DAY_CHANGED' :
+ # new date
+ date = dtz
+
+ else :
+ # build+return (date, LogLine)
+ return date, LogLine(channel, offset, type, dtz, source, target, data)
+
+ def parse_lines (self, channel, lines, date=None, starting_offset=None) :
+ """
+ Parse the given lines, yielding LogEvents.
+ """
+
+ for offset, line in enumerate(lines) :
+ # offset?
+ if starting_offset :
+ offset = starting_offset + offset
+
+ else :
+ offset = None
+
+ # try and parse
+ try :
+ # get None or (date, line)
+ line_info = self.parse_line(channel, line, date, offset)
+
+ # passthrough LogParseError's
+ except LogParseError :
+ raise
+
+ # wrap other errors as LogParseError
+ except Exception, e :
+ raise LogParseError(line, offset, "Parsing line failed: %s" % e)
+
+ else :
+ # nothing?
+ if not line_info :
+ continue
+
+ # unpack, update date
+ date, line = line_info
+
+ # yield
+ yield line
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/log_search.py Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,367 @@
+"""
+ Full-text searching of logs
+"""
+
+import datetime, calendar, pytz
+import os.path
+
+import HyperEstraier as hype
+
+import log_line, utils, config
+
+class LogSearchError (Exception) :
+ """
+ General search error
+ """
+
+ pass
+
+class SearchIndexError (LogSearchError) :
+ """
+ Error manipulating the index
+ """
+
+ def __init__ (self, msg, db) :
+ """
+ Build the error from the given message + HyperEstraier.Database
+ """
+
+ super(SearchIndexError, self).__init__("%s: %s" % (msg, db.err_msg(db.error())))
+
+class NoResultsFound (LogSearchError) :
+ """
+ No results found
+ """
+
+ pass
+
+class LogSearchIndex (object) :
+ """
+ An index on the logs for a group of channels.
+
+ This uses Hyper Estraier to handle searching, whereby each log line is a document (yes, I have a powerful server).
+
+ These log documents have the following attributes:
+ @uri - channel/date/line
+ channel - channel code
+ type - the LogType id
+ timestamp - UTC timestamp
+ source_nickname - source nickname
+ source_username - source username
+ source_hostname - source hostname
+ source_chanflags - source channel flags
+ target_nickname - target nickname
+
+ Each document then has a single line of data, which is the log data message
+ """
+
+ def __init__ (self, channels, path, mode='r') :
+ """
+ Open the database at the given path, with the given mode:
+ First char:
+ r - read, error if not exists
+ w - write, create if not exists
+ a - write, error if not exists
+ c - create, error if exists
+
+ Additional chars:
+ trunc - truncate if exists
+ + - read as well as write
+ ? - non-blocking lock open, i.e. it fails if already open
+
+ Channels is the ChannelList.
+ """
+
+ # store
+ self.channels = channels
+ self.path = path
+ self.mode = mode
+
+ # check it does not already exist?
+ if mode in 'c' and os.path.exists(path) :
+ raise LogSearchError("Index already exists: %s" % (path, ))
+
+ # mapping of { mode -> flags }
+ mode_to_flag = {
+ 'r': hype.Database.DBREADER,
+ 'w': hype.Database.DBWRITER | hype.Database.DBCREAT,
+ 'a': hype.Database.DBWRITER,
+ 'c': hype.Database.DBWRITER | hype.Database.DBCREAT,
+ }
+
+ # flags to use, standard modes
+ flags = mode_to_flag[mode[0]]
+
+ # mode-flags
+ if '?' in mode :
+ # non-blocking locking
+ flags |= hype.Database.DBLCKNB
+
+ elif '+' in mode :
+ # read
+ flags |= hype.Database.DBREADER
+
+ elif 'trunc' in mode :
+ # truncate. Dangerous!
+ flags |= hype.Database.DBTRUNC
+
+ # make instance
+ self.db = hype.Database()
+
+ # open
+ if not self.db.open(path, flags) :
+ raise SearchIndexError("Index open failed: %s, mode=%s, flags=%#06x" % (path, mode, flags), self.db)
+
+ def close (self) :
+ """
+ Explicitly close the index, this is done automatically on del
+ """
+
+ if not self.db.close() :
+ raise SearchIndexError("Index close failed", self.db)
+
+ def insert (self, channel, lines) :
+ """
+ Adds a sequence of LogLines from the given LogChannel to the index, and return the number of added items
+ """
+
+ # count from zero
+ count = 0
+
+ # iterate
+ for line in lines :
+ # insert
+ self.insert_line(channel, line)
+
+ # count
+ count += 1
+
+ # return
+ return count
+
+ def insert_line (self, channel, line) :
+ """
+ Adds a single LogLine for the given LogChannel to the index
+ """
+
+ # validate the LogChannel
+ assert channel.id
+
+ # validate the LogLine
+ assert line.offset
+ assert line.timestamp
+
+ # create new document
+ doc = hype.Document()
+
+ # line date
+ date = line.timestamp.date()
+
+ # ensure that it's not 1900
+ assert date.year != 1900
+
+ # add URI
+ doc.add_attr('@uri', "%s/%s/%d" % (channel.id, date.strftime('%Y-%m-%d'), line.offset))
+
+ # add channel id
+ doc.add_attr('channel', channel.id)
+
+ # add type
+ doc.add_attr('type', str(line.type))
+
+ # add UTC timestamp
+ doc.add_attr('timestamp', str(utils.to_utc_timestamp(line.timestamp)))
+
+ # add source attribute?
+ if line.source :
+ source_nickname, source_username, source_hostname, source_chanflags = line.source
+
+ if source_nickname :
+ doc.add_attr('source_nickname', source_nickname.encode('utf8'))
+
+ if source_username :
+ doc.add_attr('source_username', source_username.encode('utf8'))
+
+ if source_hostname :
+ doc.add_attr('source_hostname', source_hostname.encode('utf8'))
+
+ if source_chanflags :
+ doc.add_attr('source_chanflags', source_chanflags.encode('utf8'))
+
+ # add target attributes?
+ if line.target :
+ target_nickname = line.target
+
+ if target_nickname :
+ doc.add_attr('target_nickname', target_nickname.encode('utf8'))
+
+ # add data
+ if line.data :
+ doc.add_text(line.data.encode('utf8'))
+
+ # put, "clean up dispensable regions of the overwritten document"
+ if not self.db.put_doc(doc, hype.Database.PDCLEAN) :
+ raise SearchIndexError("put_doc", self.db)
+
+ def search_cond (self, cond) :
+ """
+ Search using a raw hype.Condition. Raises NoResultsFound if there aren't any results
+ """
+
+ # execute search, unused 'flags' arg stays zero
+ results = self.db.search(cond, 0)
+
+ # no results?
+ if not results :
+ raise NoResultsFound()
+
+ # iterate over the document IDs
+ for doc_id in results :
+ # load document, this throws an exception...
+ # option constants are hype.Database.GDNOATTR/GDNOTEXT
+ doc = self.db.get_doc(doc_id, 0)
+
+ # load the attributes/text
+ channel = self.channels.lookup(doc.attr('channel'))
+ type = int(doc.attr('type'))
+ timestamp = utils.from_utc_timestamp(int(doc.attr('timestamp')))
+
+ # source
+ source = (doc.attr('source_nickname'), doc.attr('source_username'), doc.attr('source_hostname'), doc.attr('source_chanflags'))
+
+ # target
+ target = doc.attr('target_nickname')
+
+ # message text
+ message = doc.cat_texts().decode('utf8')
+
+ # build+yield to as LogLine
+ yield log_line.LogLine(channel, None, type, timestamp, source, target, message)
+
+ def search (self, options=None, channel=None, attrs=None, phrase=None, order=None, max=None, skip=None) :
+ """
+ Search with flexible parameters
+
+ options - bitmask of hype.Condition.*
+ channel - LogChannel object
+ attrs - raw attribute expressions
+ phrase - the search query phrase
+ order - order attribute expression
+ max - number of results to return
+ skip - number of results to skip
+ """
+
+ # build condition
+ cond = hype.Condition()
+
+ if options :
+ # set options
+ cond.set_options(options)
+
+ if channel :
+ # add channel attribute
+ cond.add_attr(("channel STREQ %s" % channel.id).encode('utf8'))
+
+ if attrs :
+ # add attributes
+ for attr in attrs :
+ cond.add_attr(attr.encode('utf8'))
+
+ if phrase :
+ # add phrase
+ cond.set_phrase(phrase.encode('utf8'))
+
+ if order :
+ # set order
+ cond.set_order(order)
+
+ if max :
+ # set max
+ cond.set_max(max)
+
+ if skip :
+ # set skip
+ cond.set_skip(skip)
+
+ # execute
+ return self.search_cond(cond)
+
+ def search_simple (self, channel, query, count=None, offset=None, search_msg=True, search_nick=False) :
+ """
+ Search for lines from the given channel for the given simple query.
+
+ The search_* params define which attributes to search for (using fulltext search for the message, STROR for
+ attributes).
+ """
+
+ # search attributes
+ attrs = []
+
+ # nickname target query
+ if search_nick :
+ attrs.append("source_nickname STRINC %s" % query)
+# attrs.append("target_nickname STRINC %s" % query)
+
+ # use search(), backwards
+ results = list(self.search(
+ # simplified phrase
+ options = hype.Condition.SIMPLE,
+
+ # specific channel
+ channel = channel,
+
+ # given phrase
+ phrase = query if search_msg else None,
+
+ # attributes defined above
+ attrs = attrs,
+
+ # order by timestamp, descending (backwards)
+ order = "timestamp NUMD",
+
+ # count/offset
+ max = count,
+ skip = offset,
+ ))
+
+ # reverse
+ return reversed(results)
+
+ def list (self, channel, date, count=None, skip=None) :
+ """
+ List all indexed log items for the given UTC date
+ """
+
+ # start/end dates
+ dt_start = datetime.datetime(date.year, date.month, date.day, 0, 0, 0, 0)
+ dt_end = datetime.datetime(date.year, date.month, date.day, 23, 23, 59, 999999)
+
+ # search
+ return self.search(
+ # specific channel
+ channel = channel,
+
+ # specific date range
+ attrs = [
+ "timestamp NUMBT %d %d" % (utils.to_utc_timestamp(dt_start), utils.to_utc_timestamp(dt_end))
+ ],
+
+ # order correctly
+ order = "timestamp NUMA",
+
+ # max count/offset
+ max = count,
+ skip = skip
+ )
+
+def get_index () :
+ """
+ Returns the default read-only index, suitable for searching
+ """
+
+ # XXX: no caching, just open it every time
+ _index = LogSearchIndex(config.LOG_CHANNELS, config.SEARCH_INDEX_PATH, 'r')
+
+ # return
+ return _index
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/log_source.py Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,679 @@
+"""
+ A source of IRC log files
+"""
+
+import datetime, calendar, itertools, functools, math
+import os, os.path, errno
+import pytz
+
+import config, utils
+
+# a timedelta that represents one day
+ONE_DAY = datetime.timedelta(days=1)
+
+class LogSourceDecoder (object) :
+ """
+ Handles decoding of LogSource lines
+ """
+
+ def __init__ (self, encoding_list) :
+ """
+ Will try each of the given (charset, errors) items in turn, until one succeeds
+ """
+
+ self.encoding_list = encoding_list
+
+ def decode (self, line) :
+ """
+ Decode the line of str() text into an unicode object
+ """
+
+ # list of errors encountered
+ error_list = []
+
+ # try each in turn
+ for charset, errors in self.encoding_list :
+ # trap UnicodeDecodeError to try with the next one
+ try :
+ return line.decode(charset, errors)
+
+ except UnicodeDecodeError, e :
+ error_list.append("%s:%s - %s" % (charset, errors, e))
+ continue
+
+ # failure
+ raise UnicodeDecodeError("Failed to decode line: %r: %s" % (line, ', '.join(error_list)))
+
+class LogSource (object) :
+ """
+ A collection of IRC logs for a specific target in some format. Provides the possibility to read specific events
+ """
+
+ def __init__ (self, decoder, channel=None) :
+ """
+ The appropriate LogChannel must be given, as we need to be able to construct the LogLines. If it is not yet
+ known, then it can be given as None, and set later with bind_channel.
+
+ Uses the given LogSourceDecoder to decode the lines.
+ """
+
+ self.channel = channel
+ self.decoder = decoder
+
+ def bind_channel (self, channel) :
+ """
+ Set this source's channel, where None was set before
+ """
+
+ assert not self.channel
+
+ self.channel = channel
+
+ def get_latest (self, count) :
+ """
+ Yield the latest events, up to `count` of them.
+ """
+
+ abstract
+
+ def get_date (self, dt) :
+ """
+ Get logs for the given date (as a datetime).
+ """
+
+ abstract
+
+ def get_date_paged (self, dt, count, page=None) :
+ """
+ Get the logs for a given date (as a datetime), divided into pages of count each. If page is given, the time
+ portion of the dt is ignored, and the lines for the given page are returned. Otherwise, if page is None,
+ then the lines for the page containing the given timestamp is returned.
+
+ The return value is a (page, max, lines) tuple.
+ """
+
+ # how to act?
+ if page :
+ # constant skip
+ skip = (page - 1) * count
+
+ else :
+ skip = None
+
+ # go through the logs a page at a time
+ this_page = 1
+
+ # last line's timestamp
+ last_ts = None
+
+ # found it yet?
+ found = False
+
+ # count the full number of lines
+ line_count = 0
+
+ # collect lines
+ lines = []
+
+ # iterate using get_date
+ for line in self.get_date(dt) :
+ # count them
+ line_count += 1
+
+ # skip?
+ if skip :
+ skip -= 1
+ continue
+
+ # is this page all that we want/need?
+ if page or found :
+ # already full?
+ if len(lines) >= count :
+ continue
+
+ # specfic timestamp
+ else :
+ # didn't find it in this page?
+ if len(lines) >= count :
+ # reset to next page
+ lines = []
+ this_page += 1
+
+ # is dt between these two timestamps?
+ if (not last_ts or last_ts <= dt) and (dt <= line.timestamp) :
+ # found!
+ found = True
+ page = this_page
+
+ else :
+ # keep looking
+ last_ts = line.timestamp
+
+ # store line
+ lines.append(line)
+
+ # calculate max_pages
+ max_pages = math.ceil(float(line_count) / count)
+
+ # return
+ return (page, max_pages, lines)
+
+ def get_month_days (self, dt) :
+ """
+ Return an ordered sequence of dates, telling which days in the given month (as a datetime) have logs available.
+ """
+
+ abstract
+
+ def get_modified (self, dt=None, after=None, until=None) :
+ """
+ Returns a sequence of LogLines that may have been *modified* from their old values since the given datetime.
+
+ If the datetime is not given, *all* lines are returned.
+
+ If after is given, only lines from said date onwards will be returned, regardless of modification.
+ If until is given, only lines up to and including said date will be returned, regardless of modification.
+
+ The LogLines should be in time order.
+ """
+
+ abstract
+
+ def get_prev_date (self, dt) :
+ """
+ Get the next distinct date of logs available preceeding the given date, or None
+ """
+
+ abstract
+
+ def get_next_date (self, dt) :
+ """
+ Get the next distinct date of logs following the given date, or None.
+ """
+
+ abstract
+
+class LogFile (object) :
+ """
+ A file containing LogEvents
+
+ XXX: modify to implement LogSource?
+ """
+
+ def __init__ (self, path, parser, decoder, channel=None, start_date=None, sep='\n') :
+ """
+ Open the file at the given path, which contains lines as separated by the given separator. Lines are
+ decoded using the given LogSourceDecoder, and then parsed using the given parser, using the given date
+ as the initial date for this log's first line.
+
+ XXX: currently we assume start_date also for the end of the file
+ """
+
+ # store
+ self.channel = channel
+ self.path = path
+ self.parser = parser
+ self.start_date = start_date
+ self.decoder = decoder
+ self.sep = sep
+
+ # open
+ self.file = open(path, 'rb')
+
+ def __iter__ (self) :
+ """
+ Yields a series of unicode lines, as read from the top of the file
+ """
+
+ # seek to beginning
+ self.file.seek(0)
+
+ # iterate over lines, decoding them as well
+ return (self.decoder.decode(line.rstrip(self.sep)) for line in self.file)
+
+ def read_full (self) :
+ """
+ Reads all LogLines. The LogLines will have a valid offset.
+ """
+
+ # just use our __iter__
+ return self.parser.parse_lines(self.channel, self, self.start_date, starting_offset=1)
+
+ def read_from (self, dt) :
+ """
+ Reads all LogLines from the given naive timestamp onwards
+ """
+
+ # start reading at beginning
+ events = self.read_full()
+
+ # skip unwanted events
+ for event in events :
+ if event.timestamp < dt :
+ continue
+
+ else :
+ # include this line as well
+ yield event
+ break
+
+ # yield the rest as-is
+ for event in events :
+ yield event
+
+ def read_until (self, dt) :
+ """
+ Reads all LogLines up until the given naive timestamp
+ """
+
+ # start reading events at the beginning
+ events = self.read_full()
+
+ # yield events until we hit the given timestamp
+ for event in events :
+ if event.timestamp <= dt :
+ yield event
+
+ else :
+ break
+
+ # ignore the rest
+ return
+
+ def _read_blocks_reverse (self, blocksize=1024) :
+ """
+ Yields blocks of file data in reverse order, starting at the end of the file
+ """
+
+ # seek to end of file
+ self.file.seek(0, os.SEEK_END)
+
+ # read offset
+ # XXX: hack -1 to get rid of trailing newline
+ size = offset = self.file.tell() - 1
+
+ # do not try to read past the beginning of the file
+ while offset > 0:
+ # calc new offset + size
+ if offset > blocksize :
+ # full block
+ offset -= blocksize
+ read_size = blocksize
+
+ else :
+ # partial block
+ read_size = offset
+ offset = 0
+
+ # seek to offset
+ self.file.seek(offset)
+
+ # read the data we want
+ block = self.file.read(read_size)
+
+ # sanity check
+ assert len(block) == read_size
+
+ # yield
+ yield block
+
+ def _read_lines_reverse (self) :
+ """
+ Yields decoded lines from the end of the file, in reverse order.
+ """
+
+ # partial lines
+ buf = ''
+
+ # read from end of file, a block at a time
+ for block in self._read_blocks_reverse() :
+ # add in our previous buf
+ buf = block + buf
+
+ # split up lines
+ lines = buf.split(self.sep)
+
+ # keep the first one as our buffer, as it's incomplete
+ buf = lines[0]
+
+ # yield the rest a line at a time in reverse order... this looks weird, but that's how slicing works :)
+ # XXX: use something like islice, this has to build a slice object
+ for line in lines[:0:-1] :
+ yield self.decoder.decode(line)
+
+ def read_latest (self, count) :
+ """
+ Returns up to count events, from the end of the file, or less, if the file doesn't contain that many lines.
+ """
+
+ # the list of lines
+ lines = []
+
+ # start reading lines into lines
+ for line in self._read_lines_reverse() :
+ # append
+ lines.append(line)
+
+ # done?
+ if len(lines) >= count :
+ break
+
+ # decode in reverse order, using our starting date....
+ # XXX: use lines[::-1] or reversed?
+ # XXX: it may make more sense to parse in reverse order, using 'self.end_date' or something like that
+ return self.parser.parse_lines(self.channel, reversed(lines), self.start_date)
+
+class LogDirectory (LogSource) :
+ """
+ A directory containing a series of timestamped LogFiles
+ """
+
+ def __init__ (self, path, tz, parser, decoder, filename_fmt, channel=None) :
+ """
+ Load the logfiles at the given path, which are for the given LogChannel
+
+ Decode the file lines using the given decoder, the files are named according the the date in the given
+ timezone and date format, and will be parsed using the given parser.
+ """
+
+ # store
+ self.channel = channel
+ self.path = path
+ self.tz = tz
+ self.parser = parser
+ self.decoder = decoder
+ self.filename_fmt = filename_fmt
+
+ def _get_logfile_date (self, d, load=True, mtime=False, ignore_missing=False) :
+ """
+ Get the logfile corresponding to the given naive date in our timezone.
+
+ If load is False, only test for the presence of the logfile, do not actually open it. If mtime is given,
+ then this returns the file's mtime
+
+ Returns None if the logfile does not exist, unless ignore_missing is given as False.
+ """
+
+ # format filename
+ filename = d.strftime(self.filename_fmt)
+
+ # build path
+ path = os.path.join(self.path, filename)
+
+ try :
+ if load :
+ # open+return the LogFile
+ return LogFile(path, self.parser, self.decoder, start_date=d, channel=self.channel)
+
+ elif mtime :
+ # stat
+ return utils.mtime(path)
+
+ else :
+ # test
+ return os.path.exists(path)
+
+ # XXX: move to LogFile
+ except IOError, e :
+ # return None for missing files
+ if e.errno == errno.ENOENT and ignore_missing :
+ return None
+
+ else :
+ raise
+
+ def _iter_logfile_dates (self, after=None, until=None, reverse=False) :
+ """
+ Yields a series of naive datetime objects representing the logfiles that are available, in time order.
+
+ Parameters :
+ after only dates from said date onwards will be returned
+ until only dates up to and including said date will be returned
+ reverse the dates are returned in reverse order instead. Note that the meaning of after/until doesn't change
+ """
+
+ # convert timestamps to our timezone's dates
+ if after :
+ after = after.astimezone(self.tz).date()
+
+ if until :
+ until = until.astimezone(self.tz).date()
+
+ # listdir
+ filenames = os.listdir(self.path)
+
+ # sort
+ filenames.sort(reverse=reverse)
+
+ # iter files
+ for filename in filenames :
+ try :
+ # parse date
+ dt = self.tz.localize(datetime.datetime.strptime(filename, self.filename_fmt))
+ date = dt.date()
+
+ except :
+ # ignore
+ continue
+
+ else :
+ if (after and date < after) or (until and date > until) :
+ # ignore
+ continue
+
+ else :
+ # yield
+ yield dt
+
+ def _iter_date_reverse (self, dt=None) :
+ """
+ Yields an infinite series of naive date objects in our timezone, iterating backwards in time starting at the
+ given *datetime*, or the the current date, if none given
+ """
+
+ # default to now
+ if not dt :
+ dtz = self.tz.localize(datetime.datetime.now())
+
+ else :
+ # convert to target timezone
+ dtz = dt.astimezone(self.tz)
+
+ # iterate unto infinity
+ while True :
+ # yield
+ yield dtz.date()
+
+ # one day sdrawkcab
+ dtz -= ONE_DAY
+
+ def _iter_logfile_reverse (self, dt=None, max_files=100) :
+ """
+ Yields a series of LogFile objects, iterating backwards in time starting at the given datetime, or the
+ current date, if none given.
+
+ Reads/probes at most max_files files.
+ """
+
+ # start counting at zero...
+ file_count = 0
+
+ # have we found any files at all so far?
+ have_found = False
+
+ # iterate backwards over days
+ for day in self._iter_date_reverse(dt) :
+ # stop if we've handled enough files by now
+ if file_count > max_files :
+ break
+
+ # try and open the next logfile
+ logfile = None
+
+ file_count += 1
+ logfile = self._get_logfile_date(day, ignore_missing=True)
+
+ # no logfile there?
+ if not logfile :
+ # hit our limit?
+ if file_count > max_files :
+ # if we didn't find any logfiles at all, terminate rudely
+ if not have_found :
+ raise Exception("No recent logfiles found")
+
+ else :
+ # stop looking, deal with what we've got
+ return
+
+ else :
+ # skip to next day
+ continue
+
+ # mark have_found
+ have_found = True
+
+ # yield it
+ yield logfile
+
+ def get_latest (self, count) :
+ """
+ Uses _logfile_reverse to read the yield the given lines from as many logfiles as needed
+ """
+
+ # read the events into here
+ lines = []
+
+ # start reading in those logfiles
+ for logfile in self._iter_logfile_reverse() :
+ # read the events
+ # XXX: use a queue
+ lines = list(logfile.read_latest(count)) + lines
+
+ # done?
+ if len(lines) >= count :
+ break
+
+ # return the events
+ return lines
+
+ def get_date (self, dt) :
+ """
+ A 'day' is considered to be a 24-hour period from 00:00:00 23:59:59. If the timezone of the given datetime
+ differs from our native datetime, this may involve lines from more than one logfile.
+ """
+
+ # begin/end of 24h period, in target timezone
+ dtz_begin = dt.replace(hour=0, minute=0, second=0).astimezone(self.tz)
+ dtz_end = dt.replace(hour=23, minute=59, second=59, microsecond=999999).astimezone(self.tz)
+
+ # as dates
+ d_begin = dtz_begin.date()
+ d_end = dtz_end.date()
+
+# print
+# print "LogDirectory.get_date - %s" % dt
+# print "\t %s %s" % (d_begin, dtz_begin)
+# print "\t-> %s %s" % (d_end, dtz_end)
+
+ # if they're the same, just pull the full log for that date
+ if d_begin == d_end :
+ # open that log
+ logfile = self._get_logfile_date(d_begin)
+
+ # return the full data
+ return logfile.read_full()
+
+ # otherwise, we need to pull two partial logs
+ else :
+ # open both of them, but it's okay if we don't have the second one
+ f_begin = self._get_logfile_date(d_begin)
+ f_end = self._get_logfile_date(d_end, ignore_missing=True)
+
+ # chain together the two sources
+ return itertools.chain(
+ f_begin.read_from(dtz_begin),
+ f_end.read_until(dtz_end) if f_end else []
+ )
+
+ def _iter_month_days (self, month) :
+ """
+ Iterates over the days of a month as dt objects with time=0
+ """
+
+ # there's at most 31 days in a month...
+ for day in xrange(1, 32) :
+ try :
+ # try and build the datetime
+ dt = datetime.datetime(month.year, month.month, day)
+
+ except :
+ # stop
+ return
+
+ else :
+ # fix timezones + yield
+ yield month.tzinfo.localize(dt)
+
+ def get_month_days (self, month) :
+ """
+ Returns a set of dates for which logfiles are available in the given datetime's month
+ """
+
+ # iterate over month's days
+ for dt in self._iter_month_days(month) :
+ # date in our target timezone
+ log_date = dt.astimezone(self.tz).date()
+
+ # test for it
+ if self._get_logfile_date(log_date, load=False, ignore_missing=True) :
+ # valid
+ yield dt.date()
+
+ def get_modified (self, dt=None, after=None, until=None) :
+ """
+ Returns the contents off all logfiles with mtimes past the given date
+ """
+
+ # iterate through all available logfiles in date order, as datetimes, from the given date on
+ for log_date in self._iter_logfile_dates(after, until) :
+ # compare against dt?
+ if dt :
+ # stat
+ mtime = self._get_logfile_date(log_date, load=False, mtime=True, ignore_missing=True)
+
+ # not modified?
+ if mtime < dt :
+ # skip
+ continue
+
+ # open
+ logfile = self._get_logfile_date(log_date)
+
+ # yield all lines
+ for line in logfile.read_full() :
+ yield line
+
+ def get_prev_date (self, dt) :
+ """
+ Just use _iter_logfile_dates
+ """
+
+ # use for to "iter" once
+ for log_date in self._iter_logfile_dates(until=dt - ONE_DAY, reverse=True) :
+ return log_date
+
+ else :
+ return None
+
+ def get_next_date (self, dt) :
+ """
+ Just use _iter_logfile_dates
+ """
+
+ # use for to "iter" once
+ for log_date in self._iter_logfile_dates(after=dt + ONE_DAY) :
+ return log_date
+
+ else :
+ return None
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/preferences.py Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,534 @@
+"""
+ Handling user preferences
+"""
+
+import functools
+import Cookie
+
+from qmsk.web import urltree
+import utils
+
+class Preference (urltree.URLType) :
+ """
+ A specific preference
+ """
+
+ # the name to use
+ name = None
+
+ # the default value, as from parse()
+ default = None
+
+ def is_default (self, value) :
+ """
+ Returns True if the given post-value is the default value for this preference.
+
+ Defaults to just compare against self.default
+ """
+
+ return (value == self.default)
+
+ def process (self, preferences, value) :
+ """
+ Post-process this preference value. This can access the post-processed values of all other preferences that
+ were defined before this one in the list given to Preferences.
+
+ Defaults to just return value.
+ """
+
+ return value
+
+class RequestPreferences (object) :
+ """
+ Represents the specific preferences for some request
+ """
+
+ def __init__ (self, preferences, request, value_map=None) :
+ """
+ Initialize with the given Preferences object, http Request, and { key: value } mapping of raw preference values.
+
+ This will build a mapping of { name: pre-value } using Preference.parse/Preference.default, and then
+ post-process them into the final { name: value } mapping using Preference.process, in strict pref_list
+ order. Note that the process() method will only have access to those preferences processed before it was.
+ """
+
+ # store
+ self.preferences = preferences
+ self.request = request
+
+ # initialize
+ self.values = {}
+ self.set_cookies = {}
+
+ # initial value map
+ pre_values = {}
+
+ # load preferences
+ for pref in preferences.pref_list :
+ # got a value for it?
+ if value_map and pref.name in value_map :
+ # get value
+ value = value_map[pref.name]
+
+ # parse it
+ value = pref.parse(value)
+
+ else :
+ # use default value
+ value = pref.default
+
+ # add
+ pre_values[pref.name] = value
+
+ # then post-process using Preferences.process(), in strict pref_list order
+ for pref in preferences.pref_list :
+ # store into self.values, so that pref.get(...) will be able to access the still-incomplete self.values
+ # dict
+ self.values[pref.name] = pref.process(self, pre_values[pref.name])
+
+ def _get_name (self, pref) :
+ """
+ Look up a Preference's name, either by class, object or name.
+ """
+
+ # Preference -> name
+ if isinstance(pref, Preference) :
+ pref = pref.name
+
+ return pref
+
+ def pref (self, name) :
+ """
+ Look up a Preference by object, name
+ """
+
+ # Preference
+ if isinstance(name, Preference) :
+ return name
+
+ # Preference.name
+ elif isinstance(name, basestring) :
+ return self.preferences.pref_map[name]
+
+ # XXX: class?
+ else :
+ assert False
+
+ def get (self, pref) :
+ """
+ Return the value for the given Preference, or preference name
+ """
+
+ # look up
+ return self.values[self._get_name(pref)]
+
+ # support dict-access
+ __getitem__ = get
+
+ def is_default (self, pref) :
+ """
+ Returns True if the given preference is at its default value
+ """
+
+ # determine using Preference.is_default
+ return self.pref(pref).is_default(self.get(pref))
+
+ def build (self, pref) :
+ """
+ Like 'get', but return the raw cookie value
+ """
+
+ # the Preference
+ pref = self.pref(pref)
+
+ # build
+ return pref.build(self.get(pref))
+
+ def parse (self, pref, value=None) :
+ """
+ Parse+process the raw value for some pref into a value object.
+
+ Is the given raw value is None, this uses Preference.default
+ """
+
+ # lookup pref
+ pref = self.pref(pref)
+
+ # build value
+ if value is not None :
+ # parse
+ value = pref.parse(value)
+
+ else :
+ # default
+ value = pref.default
+
+ # post-process
+ value = pref.process(self, value)
+
+ # return
+ return value
+
+ def set (self, name, value_obj=None) :
+ """
+ Set a new value for the given preference (by str name).
+
+ If value_obj is None, then the preference cookie is unset
+ """
+
+ # sanity-check to make sure we're not setting it twice...
+ assert name not in self.set_cookies
+
+ # None?
+ if value_obj is not None :
+ # encode using the Preference object
+ value_str = self.preferences.pref_map[name].build(value_obj)
+
+ else :
+ # unset as None
+ value_str = None
+
+ # update in our dict
+ self.values[name] = value_obj
+
+ # add to set_cookies
+ self.set_cookies[name] = value_str
+
+class Preferences (object) :
+ """
+ Handle user preferences using cookies
+ """
+
+ def __init__ (self, pref_list) :
+ """
+ Use the given list of Preference objects.
+
+ The ordering of the given pref_list is significant for the process() implementation, as the
+ Preferences are process()'d in order.
+ """
+
+ # store
+ self.pref_list = pref_list
+
+ # translate to mapping as well
+ self.pref_map = dict((pref.name, pref) for pref in pref_list)
+
+ def load (self, request, ) :
+ """
+ Load the set of preferences for the given request, and return as a { name -> value } dict
+ """
+
+ # the dict of values
+ values = {}
+
+ # load the cookies
+ cookie_data = request.env.get('HTTP_COOKIE')
+
+ # got any?
+ if cookie_data :
+ # parse into a SimpleCookie
+ cookies = Cookie.SimpleCookie(cookie_data)
+
+ # update the the values
+ values.update((morsel.key, morsel.value) for morsel in cookies.itervalues())
+
+ else :
+ cookies = None
+
+ # apply any query parameters
+ for pref in self.pref_list :
+ # look for a query param
+ value = request.get_arg(pref.name)
+
+ if value :
+ # override
+ values[pref.name] = value
+
+ # build the RequestPreferences object
+ return cookies, RequestPreferences(self, request, values)
+
+ def handler (self, *pref_list) :
+ """
+ Intended to be used as a decorator for a request handler, this will load the give Preferences and pass
+ them to the wrapped handler as keyword arguments, in addition to any others given.
+ """
+
+ def _decorator (func) :
+ @functools.wraps(func)
+ def _handler (request, **args) :
+ # load preferences
+ cookies, prefs = self.load(request)
+
+ # bind to request.prefs
+ # XXX: better way to do this? :/
+ request.prefs = prefs
+
+ # update args with new ones
+ args.update(((pref.name, prefs.get(pref)) for pref in pref_list))
+
+ # handle to get response
+ response = func(request, **args)
+
+ # set cookies?
+ if prefs.set_cookies :
+ # default, empty, cookiejar
+ if not cookies :
+ cookies = Cookie.SimpleCookie('')
+
+ # update cookies
+ for key, value in prefs.set_cookies.iteritems() :
+ if value is None :
+ assert False, "Not implemented yet..."
+
+ else :
+ # set
+ cookies[key] = value
+ cookies[key]["path"] = config.PREF_COOKIE_PATH
+ cookies[key]["expires"] = config.PREF_COOKIE_EXPIRE_SECONDS
+
+ # add headers
+ for morsel in cookies.itervalues() :
+ response.add_header('Set-cookie', morsel.OutputString())
+
+ return response
+
+ # return wrapped handler
+ return _handler
+
+ # return decorator...
+ return _decorator
+
+# now for our defined preferences....
+import pytz
+import config
+
+class TimeFormat (urltree.URLStringType, Preference) :
+ """
+ Time format
+ """
+
+ # set name
+ name = 'time_format'
+
+ # default value
+ default = config.PREF_TIME_FMT_DEFAULT
+
+class DateFormat (urltree.URLStringType, Preference) :
+ """
+ Date format
+ """
+
+ # set name
+ name = 'date_format'
+
+ # default value
+ default = config.PREF_DATE_FMT_DEFAULT
+
+class TimezoneOffset (Preference) :
+ """
+ If the DST-aware 'timezone' is missing, we can fallback to a fixed-offset timezone as detected by
+ Javascript.
+
+ This is read-only, and None by default
+ """
+
+ name = 'timezone_offset'
+ default = None
+
+ def parse (self, offset) :
+ """
+ Offset in minutes -> said minutes
+ """
+
+ return int(offset)
+
+class Timezone (Preference) :
+ """
+ Timezone
+ """
+
+ # set name
+ name = 'timezone'
+
+ # default is handled via process()
+ default = 'auto'
+
+ # the list of available (value, name) options for use with helpers.select_options
+ OPTIONS = [('auto', "Autodetect")] + [(None, tz_name) for tz_name in pytz.common_timezones]
+
+ def parse (self, name) :
+ """
+ default -> default
+ tz_name -> pytz.timezone
+ """
+
+ # special-case for 'auto'
+ if name == self.default :
+ return self.default
+
+ else :
+ return pytz.timezone(name)
+
+ def is_default (self, tz) :
+ """
+ True if it's a FixedOffsetTimezone or PREF_TIMEZONE_FALLBACK
+ """
+
+ return (isinstance(tz, utils.FixedOffsetTimezone) or tz == config.PREF_TIMEZONE_FALLBACK)
+
+ def build (self, tz) :
+ """
+ FixedOffsetTimezone -> None
+ pytz.timezone -> tz_name
+ """
+
+ # special-case for auto/no explicit timezone
+ if self.is_default(tz) :
+ return self.default
+
+ else :
+ # pytz.timezone zone name
+ return tz.zone
+
+ def process (self, prefs, tz) :
+ """
+ If this timezone is given, simply build that. Otherwise, try and use TimezoneOffset, and if that fails,
+ just return the default.
+
+ None -> FixedOffsetTimezone/PREF_TIMEZONE_FALLBACK
+ pytz.timezone -> pytz.timezone
+ """
+
+ # specific timezone set?
+ if tz != self.default :
+ return tz
+
+ # fixed offset?
+ elif prefs[timezone_offset] is not None :
+ return utils.FixedOffsetTimezone(prefs[timezone_offset])
+
+ # default
+ else :
+ return config.PREF_TIMEZONE_FALLBACK
+
+class ImageFont (Preference) :
+ """
+ Font for ImageFormatter
+ """
+
+ # set name
+ name = 'image_font'
+
+ def __init__ (self, font_dict, default_name) :
+ """
+ Use the given { name: (path, title) } dict and default the given name
+ """
+
+ self.font_dict = font_dict
+ self.default = self.parse(default_name)
+
+ def parse (self, name) :
+ """
+ name -> (name, path, title)
+ """
+
+ path, title = self.font_dict[name]
+
+ return name, path, title
+
+ def build (self, font_info) :
+ """
+ (name, path, title) -> name
+ """
+
+ name, path, title = font_info
+
+ return name
+
+class ImageFontSize (urltree.URLIntegerType, Preference) :
+ # set name, default
+ name = 'image_font_size'
+ default = config.PREF_IMAGE_FONT_SIZE_DEFAULT
+
+ # XXX: constraints for valid values
+
+class Formatter (Preference) :
+ """
+ LogFormatter to use
+ """
+
+ # set name
+ name = 'formatter'
+
+ def __init__ (self, formatters, default) :
+ """
+ Use the given { name -> class LogFormatter } dict and default (a LogFormatter class)
+ """
+
+ self.formatters = formatters
+ self.default = default
+
+ def parse (self, fmt_name) :
+ """
+ fmt_name -> class LogFormatter
+ """
+
+ return self.formatters[fmt_name]
+
+ def build (self, fmt_cls) :
+ """
+ class LogFormatter -> fmt_name
+ """
+
+ return fmt_cls.name
+
+ def process (self, prefs, fmt_cls) :
+ """
+ class LogFormatter -> LogFormatter(tz, time_fmt, image_font.path)
+ """
+
+ # time stuff
+ tz = prefs[timezone]
+ time_fmt = prefs[time_format]
+
+ # font stuff
+ font_name, font_path, font_title = prefs[image_font]
+ font_size = prefs[image_font_size]
+
+ return fmt_cls(tz, time_fmt, font_path, font_size)
+
+class Count (urltree.URLIntegerType, Preference) :
+ """
+ Number of lines of log data to display per page
+ """
+
+ # set name
+ name = "count"
+
+ # default
+ default = config.PREF_COUNT_DEFAULT
+
+ def __init__ (self) :
+ super(Count, self).__init__(allow_negative=False, allow_zero=False, max=config.PREF_COUNT_MAX)
+
+# and then build the Preferences object
+time_format = TimeFormat()
+date_format = DateFormat()
+timezone_offset = TimezoneOffset()
+timezone = Timezone()
+image_font = ImageFont(config.FORMATTER_IMAGE_FONTS, config.PREF_IMAGE_FONT_DEFAULT)
+image_font_size = ImageFontSize()
+formatter = Formatter(config.LOG_FORMATTERS, config.PREF_FORMATTER_DEFAULT)
+count = Count()
+
+preferences = Preferences([
+ time_format,
+ date_format,
+ timezone_offset,
+ timezone,
+ image_font,
+ image_font_size,
+ formatter,
+ count,
+])
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/urls.py Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,46 @@
+
+"""
+ URL mapping for the irclogs.qmsk.net site
+"""
+
+# urltree stuff
+from qmsk.web import urltree
+
+# our own handlers
+import handlers
+
+# for types
+import utils
+
+# for configuration
+import config
+
+# our URLTypes
+types = dict(
+ # LogChannel
+ cid = utils.URLChannelName(config.LOG_CHANNELS.dict()),
+
+ # datetime
+ date = utils.URLDateType(config.URL_DATE_FMT),
+
+ # UTC timestamp
+ ts = utils.URLTimestampType(),
+)
+
+# our URLConfig
+urls = url = urltree.URLConfig(type_dict=types)
+
+# urls
+index = url('/', handlers.index )
+preferences = url('/preferences', handlers.preferences_ )
+channel_select = url('/channel_select/?channel:cid', handlers.channel_select )
+channel = url('/channels/{channel:cid}', handlers.channel_last, count=20 )
+channel_last = url('/channels/{channel:cid}/last/{count:int=100}/{type=}', handlers.channel_last )
+channel_link = url('/channels/{channel:cid}/link/{timestamp:ts}/?type=', handlers.channel_link )
+channel_calendar = url('/channels/{channel:cid}/calendar/{year:int=0}/{month:int=0}', handlers.channel_calendar )
+channel_date = url('/channels/{channel:cid}/date/{date:date}/?page:int=1&type=', handlers.channel_date )
+channel_search = url('/channels/{channel:cid}/search/?q=&page:int=1&max:int=1&type=&t:list=', handlers.channel_search )
+
+# mapper
+mapper = urltree.URLTree(urls)
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/utils.py Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,137 @@
+"""
+ Miscellaneous things
+"""
+
+import datetime, calendar, pytz
+import os, errno
+
+from qmsk.web.urltree import URLType
+
+class URLChannelName (URLType) :
+ """
+ Handle LogChannel names in URLs. Deals with instances of LogChannel
+ """
+
+ def __init__ (self, channels) :
+ """
+ Use the given { name -> LogChannel } dict
+ """
+
+ self.channels = channels
+
+ def parse (self, chan_name) :
+ """
+ chan_name -> LogChannel
+ """
+
+ return self.channels[chan_name]
+
+ def build (self, chan) :
+ """
+ LogChannel -> chan_name
+ """
+
+ return chan.id
+
+class URLDateType (URLType) :
+ """
+ Handle dates in URLs as naive datetime objects (with indeterminate time info)
+ """
+
+ def __init__ (self, date_fmt) :
+ """
+ Format/parse dates using the given format
+ """
+
+ self.date_fmt = date_fmt
+
+ def parse (self, date_str) :
+ """
+ date_str -> naive datetime.datetime
+ """
+
+ return datetime.datetime.strptime(date_str, self.date_fmt)
+
+ def build (self, date) :
+ """
+ datetime.date -> date_str
+ """
+
+ return date.strftime(self.date_fmt)
+
+class URLTimestampType (URLType) :
+ """
+ Handles an integer UNIX timestamp as an UTC datetime
+ """
+
+ def parse (self, timestamp_str) :
+ """
+ timestamp_str -> pytz.utc datetime.datetime
+ """
+
+ return from_utc_timestamp(int(timestamp_str))
+
+ def build (self, dtz) :
+ """
+ pytz.utc datetime.datetime -> timestamp_str
+ """
+
+ return str(to_utc_timestamp(dtz))
+
+def from_utc_timestamp (timestamp) :
+ """
+ Converts a UNIX timestamp into a datetime.datetime
+ """
+
+ return datetime.datetime.utcfromtimestamp(timestamp).replace(tzinfo=pytz.utc)
+
+def to_utc_timestamp (dt) :
+ """
+ Converts a datetime.datetime into a UNIX timestamp
+ """
+
+ return calendar.timegm(dt.utctimetuple())
+
+def mtime (path, ignore_missing=False) :
+ """
+ Gets the mtime for the given path as an UTC datetime, or None, if the file doesn't exist and ignore_missing
+ """
+
+ try :
+ # stat
+ st = os.stat(path)
+
+ # trap IOError
+ except os.error, e :
+ # ENOENT?
+ if ignore_missing and e.errno == errno.ENOENT :
+ return None
+
+ else :
+ raise
+
+ else :
+ # decode
+ return from_utc_timestamp(st.st_mtime)
+
+class FixedOffsetTimezone (pytz._FixedOffset) :
+ """
+ A Fixed-offset timezone with no DST info, compatible with pytz.
+
+ This is based on pytz._FixedOffset, but overrides dst() to return timedelta(0)
+ """
+
+ def __init__ (self, minutes) :
+ """
+ Minutes is simply the offset from UTC in minutes, positive or negative, at most 24h.
+ """
+
+ pytz._FixedOffset.__init__(self, minutes)
+
+ def dst (self, dt) :
+ """
+ No DST info
+ """
+
+ return datetime.timedelta(0)
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/version.py Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,106 @@
+"""
+ Figuring out the project version
+
+ Currently this only supports mercurial
+"""
+
+# only load this once
+_VERSION = None
+
+def version_mercurial (path) :
+ """
+ Returns a (branch, tags, parents, modified) tuple for the given repo's working copy
+ """
+
+ global _VERSION
+
+ # cached?
+ if _VERSION :
+ return _VERSION
+
+ # code adapted from mercurial.commands.identify
+ from mercurial import ui, hg, encoding
+ from mercurial.node import short
+
+ # open the repo
+ repo = hg.repository(ui.ui(), path)
+
+ # the working copy change context
+ ctx = repo[None]
+
+ # branch
+ branch = encoding.tolocal(ctx.branch())
+
+ # map default -> None
+ if branch == 'default' :
+ branch = None
+
+ # list of tags, without 'tip' tag
+ tags = [tag for tag in ctx.tags() if tag != 'tip']
+
+ # ctx's parents
+ parents = [short(p.node()) for p in ctx.parents()]
+
+ # local modifications?
+ modified = bool(ctx.files() + ctx.deleted())
+
+ # done
+ _VERSION = (branch, tags, parents, modified)
+ return _VERSION
+
+def version_string (path='.') :
+ """
+ Return a version string representing the version of the software at the given path.
+
+ Currently, this assumes that the given path points to a local Mercurial repo.
+ """
+
+ try :
+ # get info
+ branch, tags, parents, modified = version_mercurial(path)
+
+ except :
+ # XXX: ignore
+ raise
+
+ # tags: <tag> [ "-" <tag> [ ... ]]
+ if tags :
+ return '-'.join(tags)
+
+ # revision: <parent> [ "+" <parent> [ ... ]] [ "+" ]
+ revision = '+'.join(p for p in parents) + ('+' if modified else '')
+
+ if branch :
+ # branch: "(" <branch> ")" <revision>
+ return '(%s)%s' % (branch, revision)
+
+ else :
+ # plain: <revision>
+ return revision
+
+def version_link_hg (hgweb_url, path='.') :
+ """
+ Returns a link to a hgweb page for this version
+ """
+
+ # URL for revision ID
+ rev_url = lambda rev: '<a href="%(url)s/rev/%(rev)s">%(rev)s</a>' % dict(url=hgweb_url, rev=rev)
+
+ # get info
+ branch, tags, parents, modified = version_mercurial(path)
+
+ # tags: <tag> [ "-" <tag> [ ... ]] [ "+" ]
+ if tags :
+ return '-'.join(rev_url(tag) for tag in tags) + ('+' if modified else '')
+
+ # revision: <parent> [ "+" <parent> [ ... ]] [ "+" ]
+ revision = '+'.join(rev_url(p) for p in parents) + ('+' if modified else '')
+
+ if branch :
+ # branch: "(" <branch> ")" <revision> [ "+" ]
+ return '(%s)%s' % (rev_url(branch), revision) + ('+' if modified else '')
+
+ else :
+ # plain: <revision>
+ return revision
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/qmsk/irclogs/wsgi.py Sun Sep 13 01:15:56 2009 +0300
@@ -0,0 +1,31 @@
+"""
+ Our custom WSGI application
+"""
+
+from qmsk.web import wsgi
+
+import urls, error
+
+# our custom app with custom error() method
+class Application (wsgi.Application) :
+ def __init__ (self) :
+ """
+ Construct wsgi.Application with our URLMapper
+ """
+
+ super(Application, self).__init__(urls.mapper)
+
+ def handle_error (self, exc_info, env, start_response) :
+ """
+ Use error.build_error and return that
+ """
+
+ # get info
+ status, content_type, body = error.build_error(env=env)
+
+ # headers
+ start_response(status, [('Content-type', content_type)], exc_info)
+
+ # body
+ return body
+
--- a/scripts/search-index Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,640 +0,0 @@
-#!/usr/bin/env python2.5
-
-"""
- Tool for accessing the search index
-"""
-
-# XXX: fix path
-import sys; sys.path.insert(0, '.'); sys.path.insert(0, '..')
-
-import os, os.path, fcntl
-import datetime, pytz
-import optparse
-
-# configuration and the LogSearchIndex module
-import config, utils, log_search, channels
-
-def _open_index (options, open_mode) :
- """
- Opens the LogSearchIndex
- """
-
- return log_search.LogSearchIndex(config.LOG_CHANNELS, options.index_path, open_mode)
-
-
-def _open_index_and_channel (options, channel_name, open_mode) :
- """
- Opens+returns a LogSearchIndex and a LogChannel
- """
-
- # open the LogSearchIndex
- index = _open_index(options, open_mode)
-
- # open the channel
- channel = config.LOG_CHANNELS.lookup(channel_name)
-
- # return
- return index, channel
-
-def _iter_insert_stats (index, channel, lines) :
- """
- Insert the given lines into the index.
-
- Assumes the lines will be in time-order, and yields a series of (date, count) tuples for every date that lines
- are inserted for
- """
-
- # last date
- date = None
-
- # count
- count = 0
-
- # iter lines
- for line in lines :
- # next day?
- if not date or line.timestamp.date() != date :
- if date :
- # yield stats
- yield date, count
-
- # reset count
- count = 0
-
- # timestamp's date
- date = line.timestamp.date()
-
- # insert
- index.insert_line(channel, line)
-
- # count
- count += 1
-
- # final count?
- if date and count :
- yield date, count
-
-def _insert_lines (index, options, channel, lines) :
- """
- Insert the given lines into the index.
-
- Assumes the lines will be in time-order, and prints out as status messages the date and count for the inserted lines
- """
-
- # iterate insert stats
- for date, count in _iter_insert_stats(index, channel, lines) :
- # output date header?
- if not options.quiet :
- print "%s: %s" % (date.strftime('%Y-%m-%d'), count),
-
-def _load_channel_date (index, options, channel, date) :
- """
- Loads the logs for the given date from the channel's LogSource into the given LogSearchIndex
- """
-
- if not options.quiet :
- print "Loading date for channel %s" % channel.id
-
- try :
- # load lines for date
- lines = channel.source.get_date(date)
-
- except Exception, e :
- if not options.skip_missing :
- raise
-
- if not options.quiet :
- print "\tSkipped: %s" % (e, )
-
- else :
- # insert
- _insert_lines(index, options, channel, lines)
-
-def _parse_date (options, date_str, tz=None, fmt='%Y-%m-%d') :
- """
- Parse the given datetime, using the given timezone(defaults to options.tz) and format
- """
-
- # default tz
- if not tz :
- tz = options.timezone
-
- try :
- # parse
- return datetime.datetime.strptime(date_str, fmt).replace(tzinfo=tz)
-
- except Exception, e :
- raise CommandError("[ERROR] Invalid date: %s: %s" % (date_str, e))
-
-def _output_lines (options, lines) :
- """
- Display the formatted LogLines
- """
-
- # display as plaintext
- for line, txt_data in options.formatter.format_txt(lines, full_timestamps=True) :
- print txt_data
-
-class CommandError (Exception) :
- """
- Error with command-line arguments
- """
-
- pass
-
-def cmd_create (options) :
- """
- Creates a new index
- """
-
- # open index
- index = _open_index(options, 'ctrunc' if options.force else 'c')
-
- # that's all
- pass
-
-def cmd_load (options, channel_name, *dates) :
- """
- Loads the logs for a specific channel for the given dates (in terms of the channe logs' timezone) into the index
- """
-
- # open index/channel
- index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a')
-
- # handle each date
- for date_str in dates :
- # prase date
- try :
- date = _parse_date(options, date_str, channel.source.tz)
-
- # handle errors
- except CommandError, e :
- if options.skip_missing :
- print "[ERROR] %s" % (date_name, e)
-
- else :
- raise
-
- # otherwise, load
- else :
- _load_channel_date(index, options, channel, date)
-
-def cmd_load_month (options, channel_name, *months) :
- """
- Loads the logs for a specific channel for the given months (in terms of the channel's timezone) into the index
- """
-
- # open index/channel
- index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a')
-
- # handle each date
- for month_str in months :
- # prase date
- try :
- month = _parse_date(options, month_str, channel.source.tz, '%Y-%m')
-
- # handle errors
- except CommandError, e :
- # skip?
- if options.skip_missing :
- if not options.quiet :
- print "[ERROR] %s" % (date_name, e)
- continue
-
- else :
- raise
-
- # get the set of days
- days = list(channel.source.get_month_days(month))
-
- if not options.quiet :
- print "Loading %d days of logs:" % (len(days))
-
- # load each day
- for date in days :
- # convert to datetime
- dt = datetime.datetime.combine(date, datetime.time(0)).replace(tzinfo=channel.source.tz)
-
- # load
- _load_channel_date(index, options, channel, dt)
-
-def cmd_search (options, channel_name, query) :
- """
- Search the index for events on a specific channel with the given query
- """
-
- # sanity-check
- if options.create :
- raise Exception("--create doesn't make sense for 'search'")
-
- # open index/channel
- index, channel = _open_index_and_channel(options, channel_name, 'r')
-
- # search
- lines = index.search_simple(channel, query)
-
- # display
- _output_lines(options, lines)
-
-def cmd_list (options, channel_name, *dates) :
- """
- List the indexed events for a specific date
- """
-
- # sanity-check
- if options.create :
- raise Exception("--create doesn't make sense for 'search'")
-
- # open index/channel
- index, channel = _open_index_and_channel(options, channel_name, 'r')
-
- # ...for each date
- for date_str in dates :
- # parse date
- date = _parse_date(options, date_str)
-
- # list
- lines = index.list(channel, date)
-
- # display
- _output_lines(options, lines)
-
-def _autoload_reset (options, channels) :
- """
- Reset old autoload state
- """
-
- # warn
- if not options.quiet :
- print "[WARN] Resetting autoload state for: %s" % ', '.join(channel.id for channel in channels)
-
- # iter
- for channel in channels :
- # statefile path
- statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id)
-
- # is it present?
- if not os.path.exists(statefile_path) :
- if not options.quiet :
- print "[WARN] No statefile found at %s" % statefile_path
-
- else :
- if not options.quiet :
- print "\t%s: " % channel.id,
-
- # remove the statefile
- os.remove(statefile_path)
-
- if not options.quiet :
- print "OK"
-
-def cmd_autoload (options, *channel_names) :
- """
- Automatically loads all channel logs that have not been indexed yet (by logfile mtime)
- """
-
- # open index, nonblocking
- index = _open_index(options, 'c?' if options.create else 'a?')
-
- # default to all channels
- if not channel_names :
- channels = config.LOG_CHANNELS
-
- else :
- channels = [config.LOG_CHANNELS.lookup(channel_name) for channel_name in channel_names]
-
- # reset autoload state?
- if options.reset :
- _autoload_reset(options, channels)
- if not options.quiet :
- print
-
- # iterate channels
- for channel in channels :
- if not options.quiet :
- print "Channel %s:" % channel.id
-
- # no 'from' by default
- after = None
-
- # path to our state file
- statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id)
- statefile_tmppath = statefile_path + '.tmp'
-
- # does it exist?
- have_tmpfile = os.path.exists(statefile_tmppath)
-
- # do we have a tempfile from a previous crash?
- if have_tmpfile and not options.ignore_resume :
- # first, open it...
- statefile_tmp = open(statefile_tmppath, 'r+')
-
- # ... then lock it
- fcntl.lockf(statefile_tmp, fcntl.LOCK_EX | fcntl.LOCK_NB)
-
- # read after timestamp
- after_str = statefile_tmp.read().rstrip()
-
- if after_str :
- # parse timestamp
- after = utils.from_utc_timestamp(int(after_str))
-
- if not options.quiet :
- print "\tContinuing earlier progress from %s" % after
-
- else :
- # ignore
- if not options.quiet :
- print "\t[WARN] Ignoring empty temporary statefile"
-
- else :
- # warn about old tmpfile that was ignored
- if have_tmpfile and not options.quiet :
- print "\t[WARN] Ignoring old tmpfile state"
-
- # open new tempfile
- statefile_tmp = open(statefile_tmppath, 'w')
-
- # lock
- fcntl.lockf(statefile_tmp, fcntl.LOCK_EX | fcntl.LOCK_NB)
-
- # override?
- if options.reload :
- # load all
- mtime = None
-
- if not options.quiet :
- print "\tForcing reload!"
-
- # stat for mtime
- else :
- # stat for mtime, None if unknown
- mtime = utils.mtime(statefile_path, ignore_missing=True)
-
- if mtime and not options.quiet :
- print "\tLast load time was %s" % mtime
-
- elif not options.quiet :
- print "\t[WARN] No previous load state! Loading full logs"
-
- # only after some specific date?
- if options.after :
- # use unless read from tempfile
- if not after :
- after = options.after
-
- if not options.quiet :
- print "\tOnly including dates from %s onwards" % after
-
- else :
- if not options.quiet :
- print "\t[WARN] Ignoring --from because we found a tempfile"
-
- # only up to some specific date?
- if options.until :
- until = options.until
-
- if not options.quiet :
- print "\tOnly including dates up to (and including) %s" % until
- else :
- # default to now
- until = None
-
- # get lines
- lines = channel.source.get_modified(mtime, after, until)
-
- # insert
- if not options.quiet :
- print "\tLoading and inserting..."
- print
-
- # iterate insert() per day to display info and update progress
- for date, count in _iter_insert_stats(index, channel, lines) :
- # output date header?
- if not options.quiet :
- print "\t%10s: %d" % (date.strftime('%Y-%m-%d'), count)
-
- # write temp state
- statefile_tmp.seek(0)
- statefile_tmp.write(str(utils.to_utc_timestamp(datetime.datetime.combine(date, datetime.time(0)))))
- statefile_tmp.flush()
-
- # write autoload state
- open(statefile_path, 'w').close()
-
- # close+delete tempfile
- statefile_tmp.close()
- os.remove(statefile_tmppath)
-
- if not options.quiet :
- print
-
- # done
- return
-
-def cmd_help (options, *args) :
- """
- Help about commands
- """
-
- import inspect
-
- # general help stuff
- options._parser.print_help()
-
- # specific command?
- if args :
- # the command name
- command, = args
-
- # XXX: display info about specific command
- xxx
-
- # general
- else :
- print
- print "Available commands:"
-
- # build list of all cmd_* objects
- cmd_objects = [(name, obj) for name, obj in globals().iteritems() if name.startswith('cmd_') and inspect.isfunction(obj)]
-
- # sort alphabetically
- cmd_objects.sort()
-
- # iterate through all cmd_* objects
- for cmd_func_name, cmd_func in cmd_objects :
- # remove cmd_ prefix
- cmd_name = cmd_func_name[4:]
-
- # inspect
- cmd_args, cmd_varargs, cmd_varkw, cmd_default = inspect.getargspec(cmd_func)
- cmd_doc = inspect.getdoc(cmd_func)
-
- # remove the "options" arg
- cmd_args = cmd_args[1:]
-
- # display
- print "\t%10s %-30s : %s" % (cmd_name, inspect.formatargspec(cmd_args, cmd_varargs, None, cmd_default), cmd_doc)
-
-class MyOption (optparse.Option) :
- """
- Our custom types for optparse
- """
-
- def check_date (option, opt, value) :
- """
- Parse a date
- """
-
- try :
- # parse
- return datetime.datetime.strptime(value, '%Y-%m-%d')
-
- # trap -> OptionValueError
- except Exception, e :
- raise optparse.OptionValueError("option %s: invalid date value: %r" % (opt, value))
-
- def check_timezone (option, opt, value) :
- """
- Parse a timezone
- """
-
- try :
- # parse
- return pytz.timezone(value)
-
- # trap -> OptionValueError
- except Exception, e :
- raise optparse.OptionValueError("option %s: invalid timezone: %r" % (opt, value))
-
- def take_action (self, action, dest, opt, value, values, parser) :
- """
- Override take_action to handle date
- """
-
- if action == "parse_date" :
- # get timezone
- tz = values.timezone
-
- # set timezone
- value = value.replace(tzinfo=tz)
-
- # store
- return optparse.Option.take_action(self, 'store', dest, opt, value, values, parser)
-
- else :
- # default
- return optparse.Option.take_action(self, action, dest, opt, value, values, parser)
-
- TYPES = optparse.Option.TYPES + ('date', 'timezone')
- TYPE_CHECKER = optparse.Option.TYPE_CHECKER.copy()
- TYPE_CHECKER['date'] = check_date
- TYPE_CHECKER['timezone'] = check_timezone
- ACTIONS = optparse.Option.ACTIONS + ('parse_date', )
- STORE_ACTIONS = optparse.Option.STORE_ACTIONS + ('parse_date', )
- TYPED_ACTIONS = optparse.Option.TYPED_ACTIONS + ('parse_date', )
- ACTIONS = optparse.Option.ACTIONS + ('parse_date', )
-
-def main (argv) :
- """
- Command-line main, with given argv
- """
-
- # define parser
- parser = optparse.OptionParser(
- usage = "%prog [options] <command> [ ... ]",
- add_help_option = False,
- option_class = MyOption,
- )
-
- # general options # # # #
- general = optparse.OptionGroup(parser, "General Options")
- general.add_option('-h', "--help", dest="help", help="Show this help message and exit",
- action="store_true" )
-
- general.add_option( "--formatter", dest="formatter_name", help="LogFormatter to use",
- metavar="FMT", type="choice", default=config.PREF_FORMATTER_DEFAULT.name,
- choices=[fmt_name for fmt_name in config.LOG_FORMATTERS.iterkeys()] )
-
- general.add_option( "--index", dest="index_path", help="Index database path",
- metavar="PATH", default=config.SEARCH_INDEX_PATH )
-
- general.add_option( "--timezone", dest="timezone", help="Timezone for output",
- metavar="TZ", type="timezone", default=pytz.utc )
-
- general.add_option( "--force", dest="force", help="Force dangerous operation",
- action="store_true" )
-
- general.add_option( "--quiet", dest="quiet", help="Supress status messages",
- action="store_true" )
- parser.add_option_group(general)
-
-
- # cmd_load options # # # #
- load = optparse.OptionGroup(parser, "Load Options")
- load.add_option( "--skip-missing", dest="skip_missing", help="Skip missing logfiles",
- action="store_true" )
-
- load.add_option( "--create", dest="create", help="Create index database",
- action="store_true" )
- parser.add_option_group(load)
-
-
- # cmd_autoload options # # # #
- autoload = optparse.OptionGroup(parser, "Autoload Options")
- autoload.add_option( "--autoload-state", dest="autoload_state_path", help="Path to autoload state dir",
- metavar="PATH", default=config.SEARCH_AUTOINDEX_PATH)
-
- autoload.add_option( "--from", dest="after", help="Only autoload logfiles from the given date on",
- metavar="DATE", type="date", action="parse_date", default=None )
-
- autoload.add_option( "--until", dest="until", help="Only autoload logfiles up to (and including) the given date",
- metavar="DATE", type="date", action="parse_date", default=None )
-
- autoload.add_option( "--reload", dest="reload", help="Force reload lines",
- action="store_true" )
-
- autoload.add_option( "--reset", dest="reset", help="Reset old autload state",
- action="store_true" )
-
- autoload.add_option( "--ignore-resume", dest="ignore_resume", help="Do not try and resume interrupted autoload",
- action="store_true" )
- parser.add_option_group(autoload)
-
- # parse
- options, args = parser.parse_args(argv[1:])
-
- # postprocess stuff
- options._parser = parser
- options.formatter = config.LOG_FORMATTERS[options.formatter_name](options.timezone, "%H:%M:%S", None, None)
-
- # special-case --help
- if options.help :
- return cmd_help(options, *args)
-
- # must have at least the command argument
- if not args :
- raise CommandError("Missing command")
-
- # pop command
- command = args.pop(0)
-
- # get func
- func = globals().get('cmd_%s' % command)
-
- # unknown command?
- if not func :
- raise CommandError("Unknown command: %s" % command)
-
- # call
- func(options, *args)
-
-if __name__ == '__main__' :
- try :
- main(sys.argv)
- sys.exit(0)
-
- except CommandError, e :
- print e
- sys.exit(1)
-
--- a/urls.py Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,46 +0,0 @@
-
-"""
- URL mapping for the irclogs.qmsk.net site
-"""
-
-# urltree stuff
-from qmsk.web import urltree
-
-# our own handlers
-import handlers
-
-# for types
-import utils
-
-# for configuration
-import config
-
-# our URLTypes
-types = dict(
- # LogChannel
- cid = utils.URLChannelName(config.LOG_CHANNELS.dict()),
-
- # datetime
- date = utils.URLDateType(config.URL_DATE_FMT),
-
- # UTC timestamp
- ts = utils.URLTimestampType(),
-)
-
-# our URLConfig
-urls = url = urltree.URLConfig(type_dict=types)
-
-# urls
-index = url('/', handlers.index )
-preferences = url('/preferences', handlers.preferences_ )
-channel_select = url('/channel_select/?channel:cid', handlers.channel_select )
-channel = url('/channels/{channel:cid}', handlers.channel_last, count=20 )
-channel_last = url('/channels/{channel:cid}/last/{count:int=100}/{type=}', handlers.channel_last )
-channel_link = url('/channels/{channel:cid}/link/{timestamp:ts}/?type=', handlers.channel_link )
-channel_calendar = url('/channels/{channel:cid}/calendar/{year:int=0}/{month:int=0}', handlers.channel_calendar )
-channel_date = url('/channels/{channel:cid}/date/{date:date}/?page:int=1&type=', handlers.channel_date )
-channel_search = url('/channels/{channel:cid}/search/?q=&page:int=1&max:int=1&type=&t:list=', handlers.channel_search )
-
-# mapper
-mapper = urltree.URLTree(urls)
-
--- a/utils.py Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,137 +0,0 @@
-"""
- Miscellaneous things
-"""
-
-import datetime, calendar, pytz
-import os, errno
-
-from qmsk.web.urltree import URLType
-
-class URLChannelName (URLType) :
- """
- Handle LogChannel names in URLs. Deals with instances of LogChannel
- """
-
- def __init__ (self, channels) :
- """
- Use the given { name -> LogChannel } dict
- """
-
- self.channels = channels
-
- def parse (self, chan_name) :
- """
- chan_name -> LogChannel
- """
-
- return self.channels[chan_name]
-
- def build (self, chan) :
- """
- LogChannel -> chan_name
- """
-
- return chan.id
-
-class URLDateType (URLType) :
- """
- Handle dates in URLs as naive datetime objects (with indeterminate time info)
- """
-
- def __init__ (self, date_fmt) :
- """
- Format/parse dates using the given format
- """
-
- self.date_fmt = date_fmt
-
- def parse (self, date_str) :
- """
- date_str -> naive datetime.datetime
- """
-
- return datetime.datetime.strptime(date_str, self.date_fmt)
-
- def build (self, date) :
- """
- datetime.date -> date_str
- """
-
- return date.strftime(self.date_fmt)
-
-class URLTimestampType (URLType) :
- """
- Handles an integer UNIX timestamp as an UTC datetime
- """
-
- def parse (self, timestamp_str) :
- """
- timestamp_str -> pytz.utc datetime.datetime
- """
-
- return from_utc_timestamp(int(timestamp_str))
-
- def build (self, dtz) :
- """
- pytz.utc datetime.datetime -> timestamp_str
- """
-
- return str(to_utc_timestamp(dtz))
-
-def from_utc_timestamp (timestamp) :
- """
- Converts a UNIX timestamp into a datetime.datetime
- """
-
- return datetime.datetime.utcfromtimestamp(timestamp).replace(tzinfo=pytz.utc)
-
-def to_utc_timestamp (dt) :
- """
- Converts a datetime.datetime into a UNIX timestamp
- """
-
- return calendar.timegm(dt.utctimetuple())
-
-def mtime (path, ignore_missing=False) :
- """
- Gets the mtime for the given path as an UTC datetime, or None, if the file doesn't exist and ignore_missing
- """
-
- try :
- # stat
- st = os.stat(path)
-
- # trap IOError
- except os.error, e :
- # ENOENT?
- if ignore_missing and e.errno == errno.ENOENT :
- return None
-
- else :
- raise
-
- else :
- # decode
- return from_utc_timestamp(st.st_mtime)
-
-class FixedOffsetTimezone (pytz._FixedOffset) :
- """
- A Fixed-offset timezone with no DST info, compatible with pytz.
-
- This is based on pytz._FixedOffset, but overrides dst() to return timedelta(0)
- """
-
- def __init__ (self, minutes) :
- """
- Minutes is simply the offset from UTC in minutes, positive or negative, at most 24h.
- """
-
- pytz._FixedOffset.__init__(self, minutes)
-
- def dst (self, dt) :
- """
- No DST info
- """
-
- return datetime.timedelta(0)
-
--- a/version.py Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,106 +0,0 @@
-"""
- Figuring out the project version
-
- Currently this only supports mercurial
-"""
-
-# only load this once
-_VERSION = None
-
-def version_mercurial (path) :
- """
- Returns a (branch, tags, parents, modified) tuple for the given repo's working copy
- """
-
- global _VERSION
-
- # cached?
- if _VERSION :
- return _VERSION
-
- # code adapted from mercurial.commands.identify
- from mercurial import ui, hg, encoding
- from mercurial.node import short
-
- # open the repo
- repo = hg.repository(ui.ui(), path)
-
- # the working copy change context
- ctx = repo[None]
-
- # branch
- branch = encoding.tolocal(ctx.branch())
-
- # map default -> None
- if branch == 'default' :
- branch = None
-
- # list of tags, without 'tip' tag
- tags = [tag for tag in ctx.tags() if tag != 'tip']
-
- # ctx's parents
- parents = [short(p.node()) for p in ctx.parents()]
-
- # local modifications?
- modified = bool(ctx.files() + ctx.deleted())
-
- # done
- _VERSION = (branch, tags, parents, modified)
- return _VERSION
-
-def version_string (path='.') :
- """
- Return a version string representing the version of the software at the given path.
-
- Currently, this assumes that the given path points to a local Mercurial repo.
- """
-
- try :
- # get info
- branch, tags, parents, modified = version_mercurial(path)
-
- except :
- # XXX: ignore
- raise
-
- # tags: <tag> [ "-" <tag> [ ... ]]
- if tags :
- return '-'.join(tags)
-
- # revision: <parent> [ "+" <parent> [ ... ]] [ "+" ]
- revision = '+'.join(p for p in parents) + ('+' if modified else '')
-
- if branch :
- # branch: "(" <branch> ")" <revision>
- return '(%s)%s' % (branch, revision)
-
- else :
- # plain: <revision>
- return revision
-
-def version_link_hg (hgweb_url, path='.') :
- """
- Returns a link to a hgweb page for this version
- """
-
- # URL for revision ID
- rev_url = lambda rev: '<a href="%(url)s/rev/%(rev)s">%(rev)s</a>' % dict(url=hgweb_url, rev=rev)
-
- # get info
- branch, tags, parents, modified = version_mercurial(path)
-
- # tags: <tag> [ "-" <tag> [ ... ]] [ "+" ]
- if tags :
- return '-'.join(rev_url(tag) for tag in tags) + ('+' if modified else '')
-
- # revision: <parent> [ "+" <parent> [ ... ]] [ "+" ]
- revision = '+'.join(rev_url(p) for p in parents) + ('+' if modified else '')
-
- if branch :
- # branch: "(" <branch> ")" <revision> [ "+" ]
- return '(%s)%s' % (rev_url(branch), revision) + ('+' if modified else '')
-
- else :
- # plain: <revision>
- return revision
-
--- a/wsgi.py Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,31 +0,0 @@
-"""
- Our custom WSGI application
-"""
-
-from qmsk.web import wsgi
-
-import urls, error
-
-# our custom app with custom error() method
-class Application (wsgi.Application) :
- def __init__ (self) :
- """
- Construct wsgi.Application with our URLMapper
- """
-
- super(Application, self).__init__(urls.mapper)
-
- def handle_error (self, exc_info, env, start_response) :
- """
- Use error.build_error and return that
- """
-
- # get info
- status, content_type, body = error.build_error(env=env)
-
- # headers
- start_response(status, [('Content-type', content_type)], exc_info)
-
- # body
- return body
-