# HG changeset patch # User Tero Marttila # Date 1252793756 -10800 # Node ID 6db2527b67cf6af73ef3dd17f265216459d5a4ce # Parent 9c77698501954b7a44204583b86d14335afaad9b restructure into package format - the qmsk.* stuff doesn't work so well though, requires a symlink for qmsk.web to work... diff -r 9c7769850195 -r 6db2527b67cf __init__.py --- a/__init__.py Sun Sep 13 00:49:55 2009 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -""" - The irclogs.qmsk.net site is an IRC log browser -""" - -# the URL mapper -import urls - -# our RequestHandler -handler = urls.mapper - diff -r 9c7769850195 -r 6db2527b67cf bin/index.cgi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/index.cgi Sun Sep 13 01:15:56 2009 +0300 @@ -0,0 +1,49 @@ +#!/usr/bin/python2.5 + +""" + CGI mode using qmsk.web.cgi +""" + +def error () : + """ + Dumps out a raw traceback of the current exception to stdout, call from except. + + Used for low-level ImportError's + """ + + import sys + + # if this import fails, we're doomed + from qmsk.irclogs import error + + # format info + status, content_type, body = error.build_error() + + # HTTP headers+body + sys.stdout.write('Status: %s\r\n' % status) + sys.stdout.write('Content-type: %s\r\n' % content_type) + sys.stdout.write('\r\n') + sys.stdout.write(body) + +def main () : + """ + Build our wsgi.Application and run + """ + + try : + from qmsk.web import cgi_main + from qmsk.irclogs import wsgi + + # create app + app = wsgi.Application() + + # run once + cgi_main.run(app) + + except : + # display error on stdout + error() + +if __name__ == '__main__' : + main() + diff -r 9c7769850195 -r 6db2527b67cf bin/index.fcgi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/index.fcgi Sun Sep 13 01:15:56 2009 +0300 @@ -0,0 +1,26 @@ +#!/usr/bin/python2.5 +# :set filetype=py + +""" + FastCGI mode using qmsk.web.fastcgi_main +""" + +from qmsk.web import fastcgi_main + +# XXX: error handling for imports? Lighttp sucks hard at this +from qmsk.irclogs import wsgi + +def main () : + """ + Build our WSGIApplication and run + """ + + # create app + app = wsgi.Application() + + # run once + fastcgi_main.run(app) + +if __name__ == '__main__' : + main() + diff -r 9c7769850195 -r 6db2527b67cf bin/search-index --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/search-index Sun Sep 13 01:15:56 2009 +0300 @@ -0,0 +1,640 @@ +#!/usr/bin/env python2.5 + +""" + Tool for accessing the search index +""" + +# XXX: fix path +import sys; sys.path.insert(0, '.'); sys.path.insert(0, '..') + +import os, os.path, fcntl +import datetime, pytz +import optparse + +# configuration and the LogSearchIndex module +from qmsk.irclogs import config, utils, log_search, channels + +def _open_index (options, open_mode) : + """ + Opens the LogSearchIndex + """ + + return log_search.LogSearchIndex(config.LOG_CHANNELS, options.index_path, open_mode) + + +def _open_index_and_channel (options, channel_name, open_mode) : + """ + Opens+returns a LogSearchIndex and a LogChannel + """ + + # open the LogSearchIndex + index = _open_index(options, open_mode) + + # open the channel + channel = config.LOG_CHANNELS.lookup(channel_name) + + # return + return index, channel + +def _iter_insert_stats (index, channel, lines) : + """ + Insert the given lines into the index. + + Assumes the lines will be in time-order, and yields a series of (date, count) tuples for every date that lines + are inserted for + """ + + # last date + date = None + + # count + count = 0 + + # iter lines + for line in lines : + # next day? + if not date or line.timestamp.date() != date : + if date : + # yield stats + yield date, count + + # reset count + count = 0 + + # timestamp's date + date = line.timestamp.date() + + # insert + index.insert_line(channel, line) + + # count + count += 1 + + # final count? + if date and count : + yield date, count + +def _insert_lines (index, options, channel, lines) : + """ + Insert the given lines into the index. + + Assumes the lines will be in time-order, and prints out as status messages the date and count for the inserted lines + """ + + # iterate insert stats + for date, count in _iter_insert_stats(index, channel, lines) : + # output date header? + if not options.quiet : + print "%s: %s" % (date.strftime('%Y-%m-%d'), count), + +def _load_channel_date (index, options, channel, date) : + """ + Loads the logs for the given date from the channel's LogSource into the given LogSearchIndex + """ + + if not options.quiet : + print "Loading date for channel %s" % channel.id + + try : + # load lines for date + lines = channel.source.get_date(date) + + except Exception, e : + if not options.skip_missing : + raise + + if not options.quiet : + print "\tSkipped: %s" % (e, ) + + else : + # insert + _insert_lines(index, options, channel, lines) + +def _parse_date (options, date_str, tz=None, fmt='%Y-%m-%d') : + """ + Parse the given datetime, using the given timezone(defaults to options.tz) and format + """ + + # default tz + if not tz : + tz = options.timezone + + try : + # parse + return datetime.datetime.strptime(date_str, fmt).replace(tzinfo=tz) + + except Exception, e : + raise CommandError("[ERROR] Invalid date: %s: %s" % (date_str, e)) + +def _output_lines (options, lines) : + """ + Display the formatted LogLines + """ + + # display as plaintext + for line, txt_data in options.formatter.format_txt(lines, full_timestamps=True) : + print txt_data + +class CommandError (Exception) : + """ + Error with command-line arguments + """ + + pass + +def cmd_create (options) : + """ + Creates a new index + """ + + # open index + index = _open_index(options, 'ctrunc' if options.force else 'c') + + # that's all + pass + +def cmd_load (options, channel_name, *dates) : + """ + Loads the logs for a specific channel for the given dates (in terms of the channe logs' timezone) into the index + """ + + # open index/channel + index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a') + + # handle each date + for date_str in dates : + # prase date + try : + date = _parse_date(options, date_str, channel.source.tz) + + # handle errors + except CommandError, e : + if options.skip_missing : + print "[ERROR] %s" % (date_name, e) + + else : + raise + + # otherwise, load + else : + _load_channel_date(index, options, channel, date) + +def cmd_load_month (options, channel_name, *months) : + """ + Loads the logs for a specific channel for the given months (in terms of the channel's timezone) into the index + """ + + # open index/channel + index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a') + + # handle each date + for month_str in months : + # prase date + try : + month = _parse_date(options, month_str, channel.source.tz, '%Y-%m') + + # handle errors + except CommandError, e : + # skip? + if options.skip_missing : + if not options.quiet : + print "[ERROR] %s" % (date_name, e) + continue + + else : + raise + + # get the set of days + days = list(channel.source.get_month_days(month)) + + if not options.quiet : + print "Loading %d days of logs:" % (len(days)) + + # load each day + for date in days : + # convert to datetime + dt = datetime.datetime.combine(date, datetime.time(0)).replace(tzinfo=channel.source.tz) + + # load + _load_channel_date(index, options, channel, dt) + +def cmd_search (options, channel_name, query) : + """ + Search the index for events on a specific channel with the given query + """ + + # sanity-check + if options.create : + raise Exception("--create doesn't make sense for 'search'") + + # open index/channel + index, channel = _open_index_and_channel(options, channel_name, 'r') + + # search + lines = index.search_simple(channel, query) + + # display + _output_lines(options, lines) + +def cmd_list (options, channel_name, *dates) : + """ + List the indexed events for a specific date + """ + + # sanity-check + if options.create : + raise Exception("--create doesn't make sense for 'search'") + + # open index/channel + index, channel = _open_index_and_channel(options, channel_name, 'r') + + # ...for each date + for date_str in dates : + # parse date + date = _parse_date(options, date_str) + + # list + lines = index.list(channel, date) + + # display + _output_lines(options, lines) + +def _autoload_reset (options, channels) : + """ + Reset old autoload state + """ + + # warn + if not options.quiet : + print "[WARN] Resetting autoload state for: %s" % ', '.join(channel.id for channel in channels) + + # iter + for channel in channels : + # statefile path + statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id) + + # is it present? + if not os.path.exists(statefile_path) : + if not options.quiet : + print "[WARN] No statefile found at %s" % statefile_path + + else : + if not options.quiet : + print "\t%s: " % channel.id, + + # remove the statefile + os.remove(statefile_path) + + if not options.quiet : + print "OK" + +def cmd_autoload (options, *channel_names) : + """ + Automatically loads all channel logs that have not been indexed yet (by logfile mtime) + """ + + # open index, nonblocking + index = _open_index(options, 'c?' if options.create else 'a?') + + # default to all channels + if not channel_names : + channels = config.LOG_CHANNELS + + else : + channels = [config.LOG_CHANNELS.lookup(channel_name) for channel_name in channel_names] + + # reset autoload state? + if options.reset : + _autoload_reset(options, channels) + if not options.quiet : + print + + # iterate channels + for channel in channels : + if not options.quiet : + print "Channel %s:" % channel.id + + # no 'from' by default + after = None + + # path to our state file + statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id) + statefile_tmppath = statefile_path + '.tmp' + + # does it exist? + have_tmpfile = os.path.exists(statefile_tmppath) + + # do we have a tempfile from a previous crash? + if have_tmpfile and not options.ignore_resume : + # first, open it... + statefile_tmp = open(statefile_tmppath, 'r+') + + # ... then lock it + fcntl.lockf(statefile_tmp, fcntl.LOCK_EX | fcntl.LOCK_NB) + + # read after timestamp + after_str = statefile_tmp.read().rstrip() + + if after_str : + # parse timestamp + after = utils.from_utc_timestamp(int(after_str)) + + if not options.quiet : + print "\tContinuing earlier progress from %s" % after + + else : + # ignore + if not options.quiet : + print "\t[WARN] Ignoring empty temporary statefile" + + else : + # warn about old tmpfile that was ignored + if have_tmpfile and not options.quiet : + print "\t[WARN] Ignoring old tmpfile state" + + # open new tempfile + statefile_tmp = open(statefile_tmppath, 'w') + + # lock + fcntl.lockf(statefile_tmp, fcntl.LOCK_EX | fcntl.LOCK_NB) + + # override? + if options.reload : + # load all + mtime = None + + if not options.quiet : + print "\tForcing reload!" + + # stat for mtime + else : + # stat for mtime, None if unknown + mtime = utils.mtime(statefile_path, ignore_missing=True) + + if mtime and not options.quiet : + print "\tLast load time was %s" % mtime + + elif not options.quiet : + print "\t[WARN] No previous load state! Loading full logs" + + # only after some specific date? + if options.after : + # use unless read from tempfile + if not after : + after = options.after + + if not options.quiet : + print "\tOnly including dates from %s onwards" % after + + else : + if not options.quiet : + print "\t[WARN] Ignoring --from because we found a tempfile" + + # only up to some specific date? + if options.until : + until = options.until + + if not options.quiet : + print "\tOnly including dates up to (and including) %s" % until + else : + # default to now + until = None + + # get lines + lines = channel.source.get_modified(mtime, after, until) + + # insert + if not options.quiet : + print "\tLoading and inserting..." + print + + # iterate insert() per day to display info and update progress + for date, count in _iter_insert_stats(index, channel, lines) : + # output date header? + if not options.quiet : + print "\t%10s: %d" % (date.strftime('%Y-%m-%d'), count) + + # write temp state + statefile_tmp.seek(0) + statefile_tmp.write(str(utils.to_utc_timestamp(datetime.datetime.combine(date, datetime.time(0))))) + statefile_tmp.flush() + + # write autoload state + open(statefile_path, 'w').close() + + # close+delete tempfile + statefile_tmp.close() + os.remove(statefile_tmppath) + + if not options.quiet : + print + + # done + return + +def cmd_help (options, *args) : + """ + Help about commands + """ + + import inspect + + # general help stuff + options._parser.print_help() + + # specific command? + if args : + # the command name + command, = args + + # XXX: display info about specific command + xxx + + # general + else : + print + print "Available commands:" + + # build list of all cmd_* objects + cmd_objects = [(name, obj) for name, obj in globals().iteritems() if name.startswith('cmd_') and inspect.isfunction(obj)] + + # sort alphabetically + cmd_objects.sort() + + # iterate through all cmd_* objects + for cmd_func_name, cmd_func in cmd_objects : + # remove cmd_ prefix + cmd_name = cmd_func_name[4:] + + # inspect + cmd_args, cmd_varargs, cmd_varkw, cmd_default = inspect.getargspec(cmd_func) + cmd_doc = inspect.getdoc(cmd_func) + + # remove the "options" arg + cmd_args = cmd_args[1:] + + # display + print "\t%10s %-30s : %s" % (cmd_name, inspect.formatargspec(cmd_args, cmd_varargs, None, cmd_default), cmd_doc) + +class MyOption (optparse.Option) : + """ + Our custom types for optparse + """ + + def check_date (option, opt, value) : + """ + Parse a date + """ + + try : + # parse + return datetime.datetime.strptime(value, '%Y-%m-%d') + + # trap -> OptionValueError + except Exception, e : + raise optparse.OptionValueError("option %s: invalid date value: %r" % (opt, value)) + + def check_timezone (option, opt, value) : + """ + Parse a timezone + """ + + try : + # parse + return pytz.timezone(value) + + # trap -> OptionValueError + except Exception, e : + raise optparse.OptionValueError("option %s: invalid timezone: %r" % (opt, value)) + + def take_action (self, action, dest, opt, value, values, parser) : + """ + Override take_action to handle date + """ + + if action == "parse_date" : + # get timezone + tz = values.timezone + + # set timezone + value = value.replace(tzinfo=tz) + + # store + return optparse.Option.take_action(self, 'store', dest, opt, value, values, parser) + + else : + # default + return optparse.Option.take_action(self, action, dest, opt, value, values, parser) + + TYPES = optparse.Option.TYPES + ('date', 'timezone') + TYPE_CHECKER = optparse.Option.TYPE_CHECKER.copy() + TYPE_CHECKER['date'] = check_date + TYPE_CHECKER['timezone'] = check_timezone + ACTIONS = optparse.Option.ACTIONS + ('parse_date', ) + STORE_ACTIONS = optparse.Option.STORE_ACTIONS + ('parse_date', ) + TYPED_ACTIONS = optparse.Option.TYPED_ACTIONS + ('parse_date', ) + ACTIONS = optparse.Option.ACTIONS + ('parse_date', ) + +def main (argv) : + """ + Command-line main, with given argv + """ + + # define parser + parser = optparse.OptionParser( + usage = "%prog [options] [ ... ]", + add_help_option = False, + option_class = MyOption, + ) + + # general options # # # # + general = optparse.OptionGroup(parser, "General Options") + general.add_option('-h', "--help", dest="help", help="Show this help message and exit", + action="store_true" ) + + general.add_option( "--formatter", dest="formatter_name", help="LogFormatter to use", + metavar="FMT", type="choice", default=config.PREF_FORMATTER_DEFAULT.name, + choices=[fmt_name for fmt_name in config.LOG_FORMATTERS.iterkeys()] ) + + general.add_option( "--index", dest="index_path", help="Index database path", + metavar="PATH", default=config.SEARCH_INDEX_PATH ) + + general.add_option( "--timezone", dest="timezone", help="Timezone for output", + metavar="TZ", type="timezone", default=pytz.utc ) + + general.add_option( "--force", dest="force", help="Force dangerous operation", + action="store_true" ) + + general.add_option( "--quiet", dest="quiet", help="Supress status messages", + action="store_true" ) + parser.add_option_group(general) + + + # cmd_load options # # # # + load = optparse.OptionGroup(parser, "Load Options") + load.add_option( "--skip-missing", dest="skip_missing", help="Skip missing logfiles", + action="store_true" ) + + load.add_option( "--create", dest="create", help="Create index database", + action="store_true" ) + parser.add_option_group(load) + + + # cmd_autoload options # # # # + autoload = optparse.OptionGroup(parser, "Autoload Options") + autoload.add_option( "--autoload-state", dest="autoload_state_path", help="Path to autoload state dir", + metavar="PATH", default=config.SEARCH_AUTOINDEX_PATH) + + autoload.add_option( "--from", dest="after", help="Only autoload logfiles from the given date on", + metavar="DATE", type="date", action="parse_date", default=None ) + + autoload.add_option( "--until", dest="until", help="Only autoload logfiles up to (and including) the given date", + metavar="DATE", type="date", action="parse_date", default=None ) + + autoload.add_option( "--reload", dest="reload", help="Force reload lines", + action="store_true" ) + + autoload.add_option( "--reset", dest="reset", help="Reset old autload state", + action="store_true" ) + + autoload.add_option( "--ignore-resume", dest="ignore_resume", help="Do not try and resume interrupted autoload", + action="store_true" ) + parser.add_option_group(autoload) + + # parse + options, args = parser.parse_args(argv[1:]) + + # postprocess stuff + options._parser = parser + options.formatter = config.LOG_FORMATTERS[options.formatter_name](options.timezone, "%H:%M:%S", None, None) + + # special-case --help + if options.help : + return cmd_help(options, *args) + + # must have at least the command argument + if not args : + raise CommandError("Missing command") + + # pop command + command = args.pop(0) + + # get func + func = globals().get('cmd_%s' % command) + + # unknown command? + if not func : + raise CommandError("Unknown command: %s" % command) + + # call + func(options, *args) + +if __name__ == '__main__' : + try : + main(sys.argv) + sys.exit(0) + + except CommandError, e : + print e + sys.exit(1) + diff -r 9c7769850195 -r 6db2527b67cf channels.py --- a/channels.py Sun Sep 13 00:49:55 2009 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,38 +0,0 @@ -""" - Our list of LogChannels -""" - -class ChannelList (object) : - """ - The list of channels, and related methods - """ - - - def __init__ (self, channel_list) : - """ - Initialize with the given channel dict - """ - - self.channel_list = channel_list - self.channel_dict = dict((channel.id, channel) for channel in channel_list) - - def lookup (self, channel_name) : - """ - Looks up the LogChannel for the given name - """ - - return self.channel_dict[channel_name] - - def dict (self) : - """ - Returns a { name: LogChannel } dict - """ - return self.channel_dict - - def __iter__ (self) : - """ - Iterate over our defined LogChannel objects - """ - - return iter(self.channel_list) - diff -r 9c7769850195 -r 6db2527b67cf config.py --- a/config.py Sun Sep 13 00:49:55 2009 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,112 +0,0 @@ -""" - Configureable defaults -""" - -import os.path, pytz -from log_parser import IrssiParser -from log_channel import LogChannel -from log_source import LogSourceDecoder, LogDirectory -from log_formatter import IrssiFormatter, DebugFormatter -from channels import ChannelList -import log_formatter - -# build relative paths to the dir containing this file -relpath = lambda path : os.path.join(os.path.dirname(__file__), path) - -### ### -### Configuration ### -### ### - -# timezone to use for logs -LOG_TIMEZONE = pytz.timezone('Europe/Helsinki') - -# timestamp format for logfiles -LOG_TIMESTAMP_FMT = '%H:%M:%S' - -# the decoder used for logfiles -LOG_DECODER = LogSourceDecoder(( - ('utf-8', 'strict'), - ('latin-1', 'replace'), -)) - -# log filename format -LOG_FILENAME_FMT = '%Y-%m-%d' - -# the log parser that we use -LOG_PARSER = IrssiParser(LOG_TIMEZONE, LOG_TIMESTAMP_FMT) -#LOG_PARSER_FULLTS = IrssiParser(LOG_TIMEZONE, '%Y%m%d%H%M%S') - -# the statically defined channel list -LOG_CHANNELS = ChannelList([ - LogChannel('tycoon', "OFTC", "#tycoon", - LogDirectory(relpath('/home/spbot/irclogs/tycoon'), LOG_TIMEZONE, LOG_PARSER, LOG_DECODER, LOG_FILENAME_FMT) - ), - - LogChannel('openttd', "OFTC", "#openttd", - LogDirectory(relpath('/home/spbot/irclogs/openttd'), LOG_TIMEZONE, LOG_PARSER, LOG_DECODER, LOG_FILENAME_FMT) - ), - - LogChannel('test', "TEST", "#test", - LogDirectory(relpath('/home/spbot/irclogs/test'), LOG_TIMEZONE, LOG_PARSER, LOG_DECODER, LOG_FILENAME_FMT) - ) -]) - -# URL to the hgweb installation for this code -HGWEB_URL = "http://hg.qmsk.net/irclogs2" - -# path to the mercurial working copy containing this code -HG_WC_PATH = relpath(".") - -# how to handle decode() errors for logfile lines -LOG_SOURCE_DECODE_ERRORS = 'replace' - -# date format for URLs -URL_DATE_FMT = '%Y-%m-%d' - -# month name format -MONTH_FMT = '%B %Y' - -# timezone name format -TIMEZONE_FMT = '%Z %z' - -# TTF fonts to use for drawing images -FORMATTER_IMAGE_FONTS = { - # XXX: no unicode support - # 'default': (None, "Ugly default font" ), - 'ttf-dejavu-mono': ("/usr/share/fonts/truetype/ttf-dejavu/DejaVuSansMono.ttf", "DejaVu Sans Mono" ), - 'ttf-liberation-mono': ("/usr/share/fonts/truetype/ttf-liberation/LiberationMono-Regular.ttf", "Liberation Mono Regular" ) -} - -# available formatters -LOG_FORMATTERS = { - 'irssi': IrssiFormatter, - 'debug': DebugFormatter, -} - -# Cookie settings -PREF_COOKIE_PATH = '/' -PREF_COOKIE_EXPIRE_SECONDS = 1 * 365 * 24 * 60 * 60 # one year - -# default preferences -PREF_TIME_FMT_DEFAULT = '%H:%M:%S' -PREF_DATE_FMT_DEFAULT = '%Y-%m-%d' -PREF_TIMEZONE_FALLBACK = pytz.utc -PREF_FORMATTER_DEFAULT = IrssiFormatter -PREF_COUNT_DEFAULT = 200 -PREF_COUNT_MAX = None -PREF_IMAGE_FONT_DEFAULT = 'ttf-dejavu-mono' -PREF_IMAGE_FONT_SIZE_DEFAULT = 12 -PREF_IMAGE_FONT_SIZE_MAX = 32 - -# search line count options -SEARCH_LINE_COUNT_OPTIONS = ( - (50, 50), - (100, 100), - (200, 200), - (None, "∞"), -) - -# search index database path -SEARCH_INDEX_PATH = '/home/spbot/irclogs/search-index' -SEARCH_AUTOINDEX_PATH = '/home/spbot/irclogs/search-autoindex' - diff -r 9c7769850195 -r 6db2527b67cf error.py --- a/error.py Sun Sep 13 00:49:55 2009 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,183 +0,0 @@ -""" - Build error messages -""" - -import traceback, sys, cgi, urllib - -def truncate (msg, limit) : - """ - Truncate the given message to chars - """ - - if len(msg) > limit : - return msg[:limit-3] + '...' - - else : - return msg - -def build_link (title, url) : - return '%s' % (cgi.escape(url, True), cgi.escape(title)) - -def build_error (exc_info=None, env=None) : - """ - Dumps out a raw traceback of the given/current exception to stdout. - - If request_env, it should be a environment dict, like under WSGI, and will be used to display additional info - about the request. - - Returns a (status, content-type, body) tuple, with all components being non-unicode strs. - """ - - # default for exc_info is current exception - if not exc_info : - exc_info = sys.exc_info() - - # request URL? - if env : - try : - from qmsk.web.http import request_url - - url = request_url(env) - - except : - # ignore - url = None - else : - url = None - - # working copy path? - try : - from config import HG_WC_PATH, HGWEB_URL - - wc_path = HG_WC_PATH - hgweb_url = HGWEB_URL - - except : - # a good guess - wc_path = '.' - hgweb_url = None - - # version? - try : - from version import version_string, version_link_hg - - version = version_string(wc_path) - - if hgweb_url : - version_href = version_link_hg(hgweb_url, wc_path) - - else : - version_href = None - - except : - version = None - version_href = None - - # the exception type - exception_str = traceback.format_exception_only(*exc_info[:2])[-1] - - # the exception traceback - traceback_lines = traceback.format_exception(*exc_info) - - # XXX: make this configureable - trac_url = "http://projects.qmsk.net/irclogs2/trac" - - # ticket list - trac_query = build_link("All tickets", "%s/query" % trac_url) - - # submit ticket - submit_args = dict(type='defect') - - # handle optional components - if url : - submit_args['url'] = url - trac_query_url = build_link("Same URL", "%s/query?url=%s" % (trac_url, urllib.quote(url))) - else : - trac_query_url = "" - - if version : - submit_args['revision'] = version - trac_query_version = build_link("Same version", "%s/query?revision=%s" % (trac_url, urllib.quote(version))) - - else : - trac_query_version = "" - - if exception_str : - submit_args['summary'] = truncate(exception_str, 140) - trac_query_err = build_link("Same error", "%s/query?summary=%s" % (trac_url, urllib.quote(exception_str.rstrip()))) - - else : - trac_query_err = "" - - if traceback_lines : - # this is big - submit_args['description'] = """\ -[Insert any additional information here] - - -= Traceback = -{{{ -%s -}}}""" % ''.join(traceback_lines) - - # the trac newticket URL - submit_url = "%s/newticket?%s" % (trac_url, '&'.join('%s=%s' % (urllib.quote(k), urllib.quote(v)) for k, v in submit_args.iteritems())) - - # return - return ('500 Internal Server Error', 'text/html; charset=UTF-8', ("""\ -500 Internal Server Error -

Oops!

-

- An error occured, which was not logged, and was not reported to anybody. It might be your fault, or it might be mine. -

- -

- You can try: -

    -
  1. Poking the administrator of this site to see if they respond
  2. -
  3. Looking for similar issue tickets with: -
      -
    • %(trac_query)s
    • -
    • %(trac_query_url)s
    • -
    • %(trac_query_version)s
    • -
    • %(trac_query_err)s
    • -
    -
  4. -
  5. Submitting a new ticket using the following link (quick & easy):
  6. -
-

-
-    %(submit_url_short)s
-
- -

Details:

-

The page you tried to request was:

-
-    %(url)s
-
- -

The software version is:

-
-    %(version_link)s
-
- -

The error was:

-
-    %(exception)s
-
- -

The traceback was:

-
%(traceback)s
-""" % dict( - url = url if url else 'Unknown', - version_link = version_href if version_href else 'Unknown', - exception = truncate(exception_str, 512), - traceback = cgi.escape(''.join(' ' + line for line in traceback_lines)), - trac_query = trac_query, - trac_query_url = trac_query_url, - trac_query_version = trac_query_version, - trac_query_err = trac_query_err, - submit_url = submit_url, - submit_url_short = truncate(submit_url, 120) - )).encode('utf-8')) - diff -r 9c7769850195 -r 6db2527b67cf handlers.py --- a/handlers.py Sun Sep 13 00:49:55 2009 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,292 +0,0 @@ -""" - Our URL action handlers -""" - -import datetime, calendar, pytz - -from qmsk.web import http, template - -import urls, channels, helpers -import preferences as prefs -from preferences import preferences -import config, log_search - -# load templates from here -templates = template.TemplateLoader("templates", - _helper_class = helpers.Helpers, - urls = urls, - channel_list = config.LOG_CHANNELS, - config = config, -) - -# return a http.Response for the given text in the given format -def _render_type (request, channel, lines, type, full_timestamps=False) : - """ - Render the given LogLines as a http.Response in the given format, which is one of: - html - XXX: not supported - txt - Plaintext - png - PNG image - rss - RSS feed - """ - - # load related preferences - formatter = request.prefs['formatter'] - - kwargs = dict( - full_timestamps = full_timestamps - ) - - # we can render in various modes... - if type in ('html', None) : - xxx - - elif type == 'txt' : - # plaintext - lines = formatter.format_txt(lines, **kwargs) - - # build data - data = '\n'.join(data for line, data in lines) - - return http.Response(data, 'text/plain') - - elif type == 'png' : - # PNG image - png_data = formatter.format_png(lines, **kwargs) - - return http.Response(png_data, 'image/png', charset=None) - - elif type == 'rss' : - # RSS feed - rss_data = formatter.format_rss(lines, **kwargs) - - # XXX: fix to render as unicode? - return http.Response(rss_data, 'application/rss+xml', charset=None) - - else : - raise http.ResponseError("Unrecognized type: %r" % (type, )) - -def _render_date (request, channel, date, lines, type, count, page, max) : - """ - Render the given LogLines as a http.Response for channel_date - """ - - # type? - if type : - # special type - return _render_type(request, channel, lines, type) - - else : - # format HTML - lines = request.prefs['formatter'].format_html(lines) - - # render - return templates.render_to_response("channel_date", - req = request, - prefs = request.prefs, - channel = channel, - date = date, - count = count, - page = page, - max = max, - lines = lines, - - # for prev/next date - date_next = channel.source.get_next_date(date), - date_prev = channel.source.get_prev_date(date), - ) - -@preferences.handler() -def index (request) : - """ - The topmost index page, display a list of available channels, perhaps some general stats - """ - - return templates.render_to_response("index", - req = request, - prefs = request.prefs, - ) - -# XXX: fix this namespace crap -@preferences.handler() -def preferences_ (request) : - """ - Preferences editor - """ - - # POST? - if request.is_post() : - # update any modified preferences - for pref in preferences.pref_list : - # get the POST'd value, default = None - post_value = request.get_post(pref.name, None) - - # skip non-specified values - # XXX: this is to not clobber timezone_offset to None - if post_value is None : - continue - - # parse the POST'd value, None -> default - new_value = request.prefs.parse(pref, post_value) - - # update if given and changed - if new_value != request.prefs[pref] : - request.prefs.set(pref.name, new_value) - - # render - return templates.render_to_response("preferences", - req = request, - prefs = request.prefs, - preferences = prefs, - ) - -def channel_select (request, channel) : - """ - Redirect to the appropriate channel_view - """ - - return http.Redirect(urls.channel.build(request, channel=channel)) - -@preferences.handler(prefs.formatter) -def channel_last (request, channel, count, formatter, type=None) : - """ - The main channel view page, displaying the most recent lines - """ - - # get latest events - lines = channel.source.get_latest(count) - - # type? - if type : - # other format - return _render_type(request, channel, lines, type) - - else : - # format HTML - lines = formatter.format_html(lines) - - # render page - return templates.render_to_response("channel_last", - req = request, - prefs = request.prefs, - channel = channel, - count = count, - lines = lines, - ) - -@preferences.handler(prefs.formatter, prefs.timezone, prefs.count) -def channel_link (request, channel, timestamp, formatter, timezone, count, type=None) : - """ - Display channel_date for specific UTC timestamp - """ - - # convert timestamp to user's timezone - timestamp = timestamp.astimezone(timezone) - - # get correct day's correct page of lines - page, max, lines = channel.source.get_date_paged(timestamp, count) - - # render channel_date - return _render_date (request, channel, timestamp, lines, type, count, page, max) - -@preferences.handler(prefs.timezone) -def channel_calendar (request, channel, year, month, timezone) : - """ - Display a list of avilable logs for some month - """ - - # current date as default - now = timezone.localize(datetime.datetime.now()) - - # target year/month - target = timezone.localize(datetime.datetime( - year = year if year else now.year, - month = month if month else now.month, - day = 1 - )) - - # display calendar - return templates.render_to_response("channel_calendar", - req = request, - prefs = request.prefs, - channel = channel, - month = target, - ) - -@preferences.handler(prefs.count, prefs.timezone) -def channel_date (request, channel, date, count, timezone, page=1, type=None) : - """ - Display all log data for the given date - """ - - # convert date to user's timezone - date = timezone.localize(date) - -# print -# print "channel_date: date=%s" % date - - # get that day's events, either paged or not - if page : - page, max, lines = channel.source.get_date_paged(date, count, page) - - else : - lines = channel.source.get_date(date) - max = None - - # render channel_date - return _render_date (request, channel, date, lines, type, count, page, max) - -@preferences.handler(prefs.formatter, prefs.count) -def channel_search (request, channel, formatter, count, q=None, page=1, max=1, type=None, t=None) : - """ - Display the search form for the channel for GET, or do the search for POST. - """ - - # calculate skip offset from page/count - skip = (page - 1) * count - - # got a search query? - if q : - # attribute targets - targets = dict(('search_%s' % target, True) for target in t if target in ('msg', 'nick')) if t else {} - - try : - # do search - lines = log_search.get_index().search_simple(channel, q, count, skip, **targets) - - # update max? - if max and page > max : - max = page - - except log_search.NoResultsFound : - # no results - lines = None - - else : - # just display the search form - lines = None - - # type? - if type and lines : - # special type - return _render_type(request, channel, lines, type, full_timestamps=True) - - else : - # format lines to HTML if any - if lines : - # format - lines = formatter.format_html(lines, full_timestamps=True) - - # render page - return templates.render_to_response("channel_search", - req = request, - prefs = request.prefs, - channel = channel, - search_query = q, - search_targets = t, - count = count, - page = page, - skip = skip, - max = max, - lines = lines, - ) - diff -r 9c7769850195 -r 6db2527b67cf helpers.py --- a/helpers.py Sun Sep 13 00:49:55 2009 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,243 +0,0 @@ -""" - Some additional helpers -""" - -import datetime -import calendar as _calendar - -import qmsk.web.helpers - -import preferences, urls, config, version - -class Helpers (qmsk.web.helpers.Helpers) : - """ - Our set of helpers, inheriting from base helpers - """ - - # set contructor... - set = set - - # reference to calendar instance - calendar = _calendar.Calendar() - - # list of (month_num, month_name) for the months in the year - months = list(enumerate(_calendar.month_name))[1:] - - def version_link (self) : - """ - Returns a representing this version of the software - """ - - return version.version_link_hg(config.HGWEB_URL, config.HG_WC_PATH) - - def tz_name (self, tz) : - """ - Returns a string describing the given timezone - """ - - return self.now().strftime(config.TIMEZONE_FMT) - - def fmt_month (self, date) : - """ - Formats a month - """ - - return date.strftime(config.MONTH_FMT) - - def fmt_weekday (self, wday) : - """ - Formats an abbreviated weekday name - """ - - return _calendar.day_abbr[wday] - - def build_date (self, month, mday) : - """ - Returns a datetime.datetime for the given (month.year, month.month, mday) - """ - - return self.ctx['prefs'][preferences.timezone].localize(datetime.datetime(month.year, month.month, mday)) - - def now (self) : - """ - Build current time - """ - - return self.ctx['prefs'][preferences.timezone].localize(datetime.datetime.now()) - - def today (self) : - """ - Build today's date - """ - - return self.now().date() - - def is_today (self, dt) : - """ - Checks if the given datetime.datetime is today - """ - - # compare with current date - return dt.date() == self.today() - - def is_this_month (self, month) : - """ - Checks the given month is the current month - """ - - today = self.today() - - return (month.year == today.year and month.month == today.month) - - @staticmethod - def _wrap_year (year, month) : - """ - Wraps month to between [1, 12], spilling overflow/underflow by to year. - - Returns (year, month) - """ - - # underflow? - if month == 0 : - # wrap to previous year - return (year - 1, 12) - - # overflow? - elif month == 13 : - # wrap to next year - return (year + 1, 1) - - # sane value - elif 1 <= month <= 12 : - return (year, month) - - # insane value - else : - assert False, "invalid year/month: %d/%d" % (year, month) - - def prev_month (self, month) : - """ - Returns the month preceding the given one (as a datetime.datetime) - """ - - # previous month - y, m = self._wrap_year(month.year, month.month - 1) - - # build datetime - return datetime.datetime(year=y, month=m, day=1, tzinfo=month.tzinfo) - - def next_month (self, month) : - """ - Returns the month following the given one (as a datetime.datetime) - """ - - # previous month - y, m = self._wrap_year(month.year, month.month + 1) - - # build datetime - return datetime.datetime(year=y, month=m, day=1, tzinfo=month.tzinfo) - - def fmt_time (self, time=None) : - """ - Format given time, or current time - """ - - # defaults - if not time : - time = self.now() - - return time.strftime(self.ctx['prefs'][preferences.time_format]) - - def fmt_date (self, date=None) : - """ - Format given date, or current date - """ - - # defaults - if not date : - date = self.now() - - return date.strftime(self.ctx['prefs'][preferences.date_format]) - - def url (self, url, **params) : - """ - Build URL with our request object - """ - - return url.build(self.ctx['req'], **params) - - # old name - build_url = url - - def utc_timestamp (self, dtz) : - """ - Build an UTC timestamp from the given datetime - """ - - return urls.types['ts'].build(dtz) - - def skip_next (self, count, skip) : - """ - Return skip offset for next page - """ - - return count + skip - - def skip_page (self, count, page) : - """ - Skip to page - """ - - if page : - return count * page - - else : - return None - - def skip_prev (self, count, skip) : - """ - Return skip offset for previous page, None for first page - """ - - if skip > count : - return skip - count - - else : - return None - - def max (self, *values) : - """ - Returns the largest of the given values - """ - - return max(values) - - def select_options (self, key_values, selected_key=None) : - """ - Render a series of ' % ( + ' value="%s"' % key if key is not None else '', + ' selected="selected"' if (key if key is not None else value) == selected_key else '', + value + ) for key, value in key_values + ) + + def prev_date (self, date) : + """ + Returns the previous date for the given datetime-date + """ + + return datetime.datetime(date.year, date.month, date.day, tzinfo=date.tzinfo) - datetime.timedelta(days=1) + + def next_date (self, date) : + """ + Returns the previous date for the given datetime-date + """ + + return datetime.datetime(date.year, date.month, date.day, tzinfo=date.tzinfo) + datetime.timedelta(days=1) + diff -r 9c7769850195 -r 6db2527b67cf qmsk/irclogs/log_channel.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qmsk/irclogs/log_channel.py Sun Sep 13 01:15:56 2009 +0300 @@ -0,0 +1,54 @@ +""" + A channel represents a series of log events, stored in some log source +""" + +import log_search + +class LogChannel (object) : + """ + A single IRC channel, logged to some specific place + """ + + def __init__ (self, id, network, name, source) : + """ + Initialize this channel from the given identifier key, network name, channel name, and LogSource + """ + + # store + self.id = id + self.network = network + self.name = name + self.source = source + + # bind source + self.source.bind_channel(self) + + @property + def title (self) : + """ + Title is 'Network - #channel' + """ + + return "%s - %s" % (self.network, self.name) + + def search (self, query) : + """ + Perform a search on this channel, returning a sequence of LogLines + """ + + return log_search.index.search_simple(self, query) + + def __str__ (self) : + """ + Returns self.title + """ + + return self.title + + def __repr__ (self) : + """ + Uses self.id + """ + + return "LogChannel(%s)" % (self.id, ) + diff -r 9c7769850195 -r 6db2527b67cf qmsk/irclogs/log_formatter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qmsk/irclogs/log_formatter.py Sun Sep 13 01:15:56 2009 +0300 @@ -0,0 +1,257 @@ +""" + Format LogLines into some other representation +""" + +import re, xml.sax.saxutils + +from log_line import LogTypes +from log_formatter_pil import PILImageFormatter +from log_formatter_rss import RSSFormatter + +class LogFormatter (object) : + """ + Provides a method to format series of LogLines into various output formats, with varying themes. + """ + + # machine-readable name + name = None + + # human-readable name + title = None + + ## parameters + # use a fixed-width font for HTML output + html_fixedwidth = True + + def __init__ (self, tz, timestamp_fmt, img_ttf_path, img_font_size) : + """ + Initialize to format timestamps with the given timezone and timestamp. + + Use the given TTF font to render image text with the given size, if given, otherwise, a default one. + """ + + # store + self.tz = tz + self.timestamp_fmt = timestamp_fmt + self.img_ttf_path = img_ttf_path + self.img_font_size = img_font_size + + # XXX: harcoded + self.date_fmt = '%Y-%m-%d' + + def _format_line_text (self, line, template_dict, type=None, full_timestamp=False, **extra) : + """ + Format the given line as text, using the given { type: string template } dict. + + If type is given, then it overrides line.type + + Any additional keyword args will also be available for the template to use + """ + + # default type? + if type is None : + type = line.type + + # look up the template + if type in template_dict : + template = template_dict[type] + + else : + raise Exception("Format template not defined for type: %s" % LogTypes.name_from_code(type)) + + # convert timestamp into display timezone + dtz = line.timestamp.astimezone(self.tz) + + # full timestamps? + if full_timestamp : + # XXX: let the user define a 'datetime' format instead? + timestamp_fmt = self.date_fmt + ' ' + self.timestamp_fmt + + else : + timestamp_fmt = self.timestamp_fmt + + # breakdown source + source_nickname, source_username, source_hostname, source_chanflag = line.source + target_nickname = line.target + + # format with dict + return template % dict( + channel_name = line.channel.name, + datetime = dtz.strftime('%a %b %d %H:%M:%S %Y'), + date = dtz.strftime(self.date_fmt), + timestamp = dtz.strftime(timestamp_fmt), + source_nickname = source_nickname, + source_username = source_username, + source_hostname = source_hostname, + source_chanflag = source_chanflag, + target_nickname = target_nickname, + message = line.data, + **extra + ) + + def format_txt (self, lines, full_timestamps=False) : + """ + Format given lines as plaintext. + + If full_timestamps is given, the output will contain full timestamps with both date and time. + + No trailing newlines. + """ + + abstract + + def format_html (self, lines, full_timestamps=False) : + """ + Format as HTML. + + See format_txt for information about arguments + """ + + abstract + + def format_png (self, lines, full_timestamps=False) : + """ + Format as a PNG image, returning the binary PNG data + """ + + abstract + + def format_rss (self, lines, full_timestamps=False) : + """ + Format as an XML RSS document + """ + + abstract + +class BaseHTMLFormatter (LogFormatter) : + """ + Implements some HTML-formatting utils + """ + + # parameters + html_fixedwidth = True + + # regexp to match URLs + URL_REGEXP = re.compile(r"http://\S+") + + def _process_links (self, line) : + """ + Processed the rendered line, adding in 's for things that look like URLs, returning the new line. + + The line should already be escaped + """ + + def _encode_url (match) : + # encode URL + url_html = match.group(0) + url_link = xml.sax.saxutils.unescape(url_html) + + return '%(url_html)s' % dict(url_link=url_link, url_html=url_html) + + return self.URL_REGEXP.sub(_encode_url, line) + + def format_html (self, lines, **kwargs) : + """ + Just uses format_txt, but processes links, etc + """ + + # format using IrssiTextFormatter + for line, txt in self.format_txt(lines, **kwargs) : + # escape HTML + html = xml.sax.saxutils.escape(txt) + + # process links + html = self._process_links(html) + + # yield + yield line, html + + +class IrssiTextFormatter (RSSFormatter, PILImageFormatter, LogFormatter) : + """ + Implements format_txt for irssi-style output + """ + + # format definitions by type + __FMT = { + LogTypes.RAW : "%(timestamp)s %(data)s", + LogTypes.LOG_OPEN : "--- Log opened %(datetime)s", + LogTypes.LOG_CLOSE : "--- Log closed %(datetime)s", + 'DAY_CHANGED' : "--- Day changed %(date)s", + + LogTypes.MSG : "%(timestamp)s <%(source_chanflag)s%(source_nickname)s> %(message)s", + LogTypes.NOTICE : "%(timestamp)s -%(source_nickname)s- %(message)s", + LogTypes.ACTION : "%(timestamp)s * %(source_nickname)s %(message)s", + + LogTypes.JOIN : "%(timestamp)s -!- %(source_nickname)s [%(source_username)s@%(source_hostname)s] has joined %(channel_name)s", + LogTypes.PART : "%(timestamp)s -!- %(source_nickname)s [%(source_username)s@%(source_hostname)s] has left %(channel_name)s [%(message)s]", + LogTypes.KICK : "%(timestamp)s -!- %(target_nickname)s was kicked from %(channel_name)s by %(source_nickname)s [%(message)s]", + LogTypes.MODE : "%(timestamp)s -!- mode/%(channel_name)s [%(message)s] by %(source_nickname)s", + + LogTypes.NICK : "%(timestamp)s -!- %(source_nickname)s is now known as %(target_nickname)s", + LogTypes.QUIT : "%(timestamp)s -!- %(source_nickname)s [%(source_username)s@%(source_hostname)s] has quit [%(message)s]", + + LogTypes.TOPIC : "%(timestamp)s -!- %(source_nickname)s changed the topic of %(channel_name)s to: %(message)s", + 'TOPIC_UNSET' : "%(timestamp)s -!- Topic unset by %(source_nickname)s on %(channel_name)s", + + LogTypes.SELF_NOTICE: "%(timestamp)s -%(source_nickname)s- %(message)s", + LogTypes.SELF_NICK : "%(timestamp)s -!- %(source_nickname)s is now known as %(target_nickname)s", + + LogTypes.NETSPLIT_START : + "%(timestamp)s -!- Netsplit %(source_hostname)s <-> %(target_nickname)s quits: %(_netsplit_targets)s", + LogTypes.NETSPLIT_END : + "%(timestamp)s -!- Netsplit over, joins: %(_netsplit_targets)s", + } + + def format_txt (self, lines, full_timestamps=False) : + # ...handle each line + for line in lines : + # extra args + extra = {} + + # default to line.type + type = line.type + + # special formatting for unset-Topic + if line.type == LogTypes.TOPIC and line.data is None : + type = 'TOPIC_UNSET' + + # format netsplit stuff + elif line.type & LogTypes._NETSPLIT_MASK : + # format the netsplit-targets stuff + extra['_netsplit_targets'] = line.data + + # using __TYPES + yield line, self._format_line_text(line, self.__FMT, type, full_timestamps, **extra) + +class IrssiFormatter (BaseHTMLFormatter, IrssiTextFormatter) : + """ + Implements plain black-and-white irssi-style formatting + """ + + # name + name = 'irssi' + title = "Irssi (plain)" + +class DebugFormatter (BaseHTMLFormatter) : + """ + Implements a raw debug-style formatting of LogLines + """ + + # name + name = 'debug' + title = "Raw debugging format" + + def format_txt (self, lines, full_timestamps=False) : + # iterate + for line in lines : + # just dump + yield line, unicode(line) + +def by_name (name) : + """ + Lookup and return a class LogFormatter by name + """ + + return FORMATTERS[name] + diff -r 9c7769850195 -r 6db2527b67cf qmsk/irclogs/log_formatter_pil.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qmsk/irclogs/log_formatter_pil.py Sun Sep 13 01:15:56 2009 +0300 @@ -0,0 +1,81 @@ +""" + Use of PIL to render the image formatting stuff +""" + +from PIL import Image, ImageDraw, ImageFont + +from cStringIO import StringIO + +class PILImageFormatter (object) : + """ + Mixin for LogFormatter that implements the basic image-rendering operations on top of format_txt + """ + + # the font we load + font = None + + # line spacing in pixels + LINE_SPACING = 1 + + def _load_font (self) : + """ + Use the configured img_ttf_path for a TrueType font, or a default one + """ + + if self.font : + pass + + elif self.img_ttf_path : + # load truetype with configured size + self.font = ImageFont.truetype(self.img_ttf_path, self.img_font_size) + + else : + # default + self.font = ImageFont.load_default() + + return self.font + + def format_png (self, lines, **kwargs) : + """ + Build and return a PNG image of the given lines, using format_txt + """ + + # load font + font = self._load_font() + + # build list of plain-text line data + lines = list(data for line, data in self.format_txt(lines, **kwargs)) + + # lines sizes + line_sizes = [font.getsize(line) for line in lines] + + # figure out how wide/high the image will be + width = max(width for width, height in line_sizes) + height = sum(height + self.LINE_SPACING for width, height in line_sizes) + + # create new B/W image + img = Image.new('L', (width, height), 0xff) + + # drawer + draw = ImageDraw.Draw(img) + + # starting offset + offset_y = 0 + + # draw the lines + for line, (width, height) in zip(lines, line_sizes) : + # draw + draw.text((0, offset_y), line, font=font) + + # next offset + offset_y += height + self.LINE_SPACING + + # output buffer + buf = StringIO() + + # save + img.save(buf, 'png') + + # return data + return buf.getvalue() + diff -r 9c7769850195 -r 6db2527b67cf qmsk/irclogs/log_formatter_rss.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qmsk/irclogs/log_formatter_rss.py Sun Sep 13 01:15:56 2009 +0300 @@ -0,0 +1,41 @@ +""" + Uses PyRSS2Gen to generate XML RSS documents +""" + +import PyRSS2Gen as RSS2Gen +import datetime, pytz + +class RSSFormatter (object) : + """ + Mixin for LogFormatter that implements the basic RSS-rendering stuff on top of format_html + """ + + def format_rss (self, lines, **kwargs) : + """ + Process using format_html + """ + + # build the RSS2 object and return the XML + return RSS2Gen.RSS2( + title = "IRC RSS feed", + link = "http://irclogs.qmsk.net/", + description = "A stupid RSS feed that nobody sane would ever use", + + # XXX: GMT + lastBuildDate = datetime.datetime.utcnow(), + + items = [ + RSS2Gen.RSSItem( + # use the formatted HTML data as the title + title = html_data, + + # timestamp + pubDate = line.timestamp.astimezone(pytz.utc), + + # link + link = "http://xxx/", + + ) for line, html_data in self.format_html(lines, **kwargs) + ] + ).to_xml('utf8') + diff -r 9c7769850195 -r 6db2527b67cf qmsk/irclogs/log_line.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qmsk/irclogs/log_line.py Sun Sep 13 01:15:56 2009 +0300 @@ -0,0 +1,186 @@ +""" + An IRC logfile consists of a series of lines/events +""" + +class LogTypes : + """ + Definitions of the various LogLines types: + + LogTypes.RAW + LogTypes.LOG_OPEN + LogTypes.LOG_CLOSE + + LogTypes.MSG + LogTypes.NOTICE + LogTypes.ACTION + + LogTypes.JOIN + LogTypes.PART + LogTypes.KICK + LogTypes.MODE + + LogTypes.NICK + LogTypes.QUIT + + LogTypes.TOPIC + + LogTypes.SELF_NOTICE + LogTypes.SELF_NICK + """ + + # list of LogType values by name + LIST = [ + ## special + # unknown type, may or may not have a timestamp, no source, only data + ('RAW', 0x01), + + # log opened + ('LOG_OPEN', 0x02), + + # log closed + ('LOG_CLOSE', 0x03), + + ## messages + # sent message to + ('MSG', 0x10), + + # sent notice with message to + ('NOTICE', 0x11), + + # sent CTCP action with message to + ('ACTION', 0x12), + + ## user-channel stats + # joined + ('JOIN', 0x21), + + # left with message + ('PART', 0x22), + + # kicked from with message + ('KICK', 0x25), + + # changed modes on with modestring + ('MODE', 0x26), + + ## user status + # changed nickname to + ('NICK', 0x31), + + # quit the network with quit-message + ('QUIT', 0x32), + + ## general channel status + # changed the topic of to + # data may be None if the topic was unset + ('TOPIC', 0x41), + + ## our own actions + # we () sent a notice with message to + ('SELF_NOTICE', 0x51), + + # we () changed nickname to + ('SELF_NICK', 0x52), + + ## slightly weirder bits + # netsplit between and , is a space-separated list of s affected + # the last item in the list of nicknames may also be of the form "+", where count is the number of additional, but hidden, nicknames affected + ('NETSPLIT_START', 0x61), + + # netsplit over, is a list of users affected, see NETSPLIT_START + ('NETSPLIT_END', 0x062), + ] + + @classmethod + def name_from_code (cls, code) : + """ + Looks up a LogType name by code + """ + + return dict((type, name) for name, type in cls.LIST)[code] + +# apply as attributes +for name, code in LogTypes.LIST : + setattr(LogTypes, name, code) + +# masks +LogTypes._NETSPLIT_MASK = 0x60 + +class LogLine (object) : + """ + An event on some specific channel + """ + + # the LogChannel + channel = None + + # the offset, only garunteed to be unique for a specific channel and date + offset = None + + # the event type, as defiend in LogTypes + type = None + + # the UTC timestamp of the event + timestamp = None + + # the source, this should be a (nickname, username, hostname, chanflags) tuple + source = None + + # possible target nickname for certain types (kick, nick) + target = None + + # associated data (message, etc) + data = None + + def __init__ (self, channel, offset, type, timestamp, source, target, data) : + """ + Initialize with given values + """ + + self.channel = channel + self.offset = offset + self.type = type + self.timestamp = timestamp + self.source = source + self.target = target + self.data = data + + def format_type (self) : + """ + Formats type as a string code + """ + + return LogTypes.name_from_code(self.type) + + def format_source (self) : + """ + Formats source as [][][!][@], omitting those parts that are missing. + + If all parts are None, this returns the empty string + """ + + nick, user, host, flags = self.source + + return "%s%s%s%s" % ( + flags if flags and flags != ' ' else '', + nick if nick else '', + '!' + user if user else '', + '@' + host if host else '' + ) + + def __unicode__ (self) : + return '\t'.join(( + self.channel.name, + str(self.offset), + self.format_type(), + str(self.timestamp), + self.format_source(), + str(self.target), + unicode(self.data) + )) + + def __repr__ (self) : + return "LogLine(%r, %s, %-12s, %s, %-35s, %-10s, %r)" % ( + self.channel, self.offset, self.format_type(), self.timestamp, self.format_source(), self.target, self.data + ) + diff -r 9c7769850195 -r 6db2527b67cf qmsk/irclogs/log_parser.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qmsk/irclogs/log_parser.py Sun Sep 13 01:15:56 2009 +0300 @@ -0,0 +1,233 @@ +""" + Parse log data into log_events +""" + +import re +import datetime + +from log_line import LogTypes, LogLine + +class LogParseError (Exception) : + """ + Parsing some line failed + """ + + def __init__ (self, line, offset, message) : + super(LogParseError, self).__init__("%r@%s: %s" % (line, offset, message)) + +class LogParser (object) : + """ + Abstract interface + """ + + def __init__ (self, tz, timestamp_fmt="%H:%M:%S") : + """ + Setup the parser to use the given format for line timestamps, which are of the given timezone + """ + + self.tz = tz + self.timestamp_fmt = timestamp_fmt + + def parse_lines (self, channel, lines, date=None, starting_offset=None) : + """ + Parse the given (iterable) lines of unicode text into a LogEvent, no trailing newline. + + Channel is the LogChannel that these lines belong to. + + Offset is the starting offset, and may be None to not use it. + + Giving date lets the parser build full timestamps, otherwise, unless line timestamps have full date + information, event timestamps will have a date component of 1900/1/1. + """ + + abstract + +class IrssiParser (LogParser) : + """ + A parser for irssi logfiles + """ + + # timestamp prefix, with trailing space + _TS = r'(?P[a-zA-Z0-9: ]+[a-zA-Z0-9])\s*' + + # subexpression parts + _NICK = r'(?P.+?)' + _NICK2 = r'(?P.+?)' + _TARGET = r'(?P.+?)' + _CHAN = r'(?P.+?)' + _CHAN2 = r'(?P.+?)' + _USERHOST = r'(?P.*?)@(?P.*?)' + _MSG = r'(?P.*)' + _SRV1 = r'(?P.+?)' + _SRV2 = r'(?P.+?)' + + # regular expressions for matching lines, by type + TYPE_EXPRS = ( + ( LogTypes.LOG_OPEN, r'--- Log opened (?P.+)' ), + ( LogTypes.LOG_CLOSE, r'--- Log closed (?P.+)' ), + ( LogTypes.MSG, _TS + r'<(?P.)' + _NICK + '> ' + _MSG ), + ( LogTypes.NOTICE, _TS + r'-' + _NICK + ':' + _CHAN + '- ' + _MSG ), + ( LogTypes.ACTION, _TS + r'\* ' + _NICK + ' ' + _MSG ), + ( LogTypes.JOIN, _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has joined ' + _CHAN ), + ( LogTypes.PART, _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has left ' + _CHAN + ' \[(?P.*?)\]' ), + ( LogTypes.KICK, _TS + r'-!- ' + _TARGET + ' was kicked from ' + _CHAN + ' by ' + _NICK + ' \[(?P.*?)\]' ), + # XXX: use hostname instead of nickname for ServerMode + ( LogTypes.MODE, _TS + r'-!- (mode|ServerMode)/' + _CHAN + ' \[(?P.+?)\] by (?P\S+)' ), + ( LogTypes.NICK, _TS + r'-!- ' + _NICK + ' is now known as (?P\S+)' ), + ( LogTypes.QUIT, _TS + r'-!- ' + _NICK + ' \[' + _USERHOST + '\] has quit \[(?P.*?)\]' ), + ( LogTypes.TOPIC, _TS + r'-!- (' + _NICK + ' changed the topic of ' + _CHAN + ' to: (?P.*)|Topic unset by ' + _NICK2 + ' on ' + _CHAN2 + ')' ), + + ( LogTypes.SELF_NOTICE, _TS + r'\[notice\(' + _CHAN + '\)\] ' + _MSG ), + ( LogTypes.SELF_NICK, _TS + r'-!- You\'re now known as (?P\S+)' ), + + ( LogTypes.NETSPLIT_START, _TS + r'-!- Netsplit ' + _SRV1 + ' <-> ' + _SRV2 + ' quits: (?P[^(]+)( \(\+(?P\d+) more,\S+\))?'), + ( LogTypes.NETSPLIT_END, _TS + r'-!- Netsplit over, joins: (?P[^(]+)( \(\+(?P\d+) more\))?' ), + + ( 'DAY_CHANGED', r'--- Day changed (?P.+)' ), + ) + + # precompile + TYPE_REGEXES = [(type, re.compile(expr)) for type, expr in TYPE_EXPRS] + + def parse_line (self, channel, line, date, offset=None) : + """ + Parse a single line, and return the resulting LogLine, or None, to ignore the line. + + Uses self.TYPE_REGEXES to do the matching + """ + + # empty line + if not line : + return + + # look for match + match = type = None + + # test each type + for type, regex in self.TYPE_REGEXES : + # attempt to match + match = regex.match(line) + + # found, break + if match : + break + + # no match found? + if not match : + raise LogParseError(line, offset, "Line did not match any type") + + # match groups + groups = match.groupdict(None) + + # parse timestamp + if 'datetime' in groups : + # parse datetime using default asctime() format + dt = datetime.datetime.strptime(groups['datetime'], '%a %b %d %H:%M:%S %Y') + + elif 'timestamp' in groups : + # parse timestamp into naive datetime + dt = datetime.datetime.strptime(groups['timestamp'], self.timestamp_fmt) + + # override date? + if date : + dt = dt.replace(year=date.year, month=date.month, day=date.day) + + elif 'date' in groups : + # parse date-only datetime + dt = datetime.datetime.strptime(groups['date'], '%a %b %d %Y') + + else : + # no timestamp !? + raise LogParseError(line, offset, "No timestamp") + + # now localize with timezone + dtz = self.tz.localize(dt) + + # channel, currently unused + channel_name = (groups.get('channel') or groups.get('channel2')) + + # source + if 'server1' in groups : + source = (None, None, groups.get('server1'), None) + + else : + source = (groups.get('nickname') or groups.get('nickname2'), groups.get('username'), groups.get('hostname'), groups.get('flags')) + + # target + if 'server2' in groups : + target = groups.get('server2') + + else : + target = groups.get('target') + + # data + if 'message' in groups : + data = groups['message'] + + elif 'mode' in groups : + data = groups['mode'] + + elif 'topic' in groups : + data = groups['topic'] + + elif 'nick_list' in groups : + # split into components + list = groups['nick_list'].split(', ') + + # additional count? + if 'count' in groups and groups['count'] : + list.append('+%d' % int(groups['count'])) + + # join + data = ' '.join(list) + + else : + data = None + + # custom types? + if type == 'DAY_CHANGED' : + # new date + date = dtz + + else : + # build+return (date, LogLine) + return date, LogLine(channel, offset, type, dtz, source, target, data) + + def parse_lines (self, channel, lines, date=None, starting_offset=None) : + """ + Parse the given lines, yielding LogEvents. + """ + + for offset, line in enumerate(lines) : + # offset? + if starting_offset : + offset = starting_offset + offset + + else : + offset = None + + # try and parse + try : + # get None or (date, line) + line_info = self.parse_line(channel, line, date, offset) + + # passthrough LogParseError's + except LogParseError : + raise + + # wrap other errors as LogParseError + except Exception, e : + raise LogParseError(line, offset, "Parsing line failed: %s" % e) + + else : + # nothing? + if not line_info : + continue + + # unpack, update date + date, line = line_info + + # yield + yield line + + diff -r 9c7769850195 -r 6db2527b67cf qmsk/irclogs/log_search.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qmsk/irclogs/log_search.py Sun Sep 13 01:15:56 2009 +0300 @@ -0,0 +1,367 @@ +""" + Full-text searching of logs +""" + +import datetime, calendar, pytz +import os.path + +import HyperEstraier as hype + +import log_line, utils, config + +class LogSearchError (Exception) : + """ + General search error + """ + + pass + +class SearchIndexError (LogSearchError) : + """ + Error manipulating the index + """ + + def __init__ (self, msg, db) : + """ + Build the error from the given message + HyperEstraier.Database + """ + + super(SearchIndexError, self).__init__("%s: %s" % (msg, db.err_msg(db.error()))) + +class NoResultsFound (LogSearchError) : + """ + No results found + """ + + pass + +class LogSearchIndex (object) : + """ + An index on the logs for a group of channels. + + This uses Hyper Estraier to handle searching, whereby each log line is a document (yes, I have a powerful server). + + These log documents have the following attributes: + @uri - channel/date/line + channel - channel code + type - the LogType id + timestamp - UTC timestamp + source_nickname - source nickname + source_username - source username + source_hostname - source hostname + source_chanflags - source channel flags + target_nickname - target nickname + + Each document then has a single line of data, which is the log data message + """ + + def __init__ (self, channels, path, mode='r') : + """ + Open the database at the given path, with the given mode: + First char: + r - read, error if not exists + w - write, create if not exists + a - write, error if not exists + c - create, error if exists + + Additional chars: + trunc - truncate if exists + + - read as well as write + ? - non-blocking lock open, i.e. it fails if already open + + Channels is the ChannelList. + """ + + # store + self.channels = channels + self.path = path + self.mode = mode + + # check it does not already exist? + if mode in 'c' and os.path.exists(path) : + raise LogSearchError("Index already exists: %s" % (path, )) + + # mapping of { mode -> flags } + mode_to_flag = { + 'r': hype.Database.DBREADER, + 'w': hype.Database.DBWRITER | hype.Database.DBCREAT, + 'a': hype.Database.DBWRITER, + 'c': hype.Database.DBWRITER | hype.Database.DBCREAT, + } + + # flags to use, standard modes + flags = mode_to_flag[mode[0]] + + # mode-flags + if '?' in mode : + # non-blocking locking + flags |= hype.Database.DBLCKNB + + elif '+' in mode : + # read + flags |= hype.Database.DBREADER + + elif 'trunc' in mode : + # truncate. Dangerous! + flags |= hype.Database.DBTRUNC + + # make instance + self.db = hype.Database() + + # open + if not self.db.open(path, flags) : + raise SearchIndexError("Index open failed: %s, mode=%s, flags=%#06x" % (path, mode, flags), self.db) + + def close (self) : + """ + Explicitly close the index, this is done automatically on del + """ + + if not self.db.close() : + raise SearchIndexError("Index close failed", self.db) + + def insert (self, channel, lines) : + """ + Adds a sequence of LogLines from the given LogChannel to the index, and return the number of added items + """ + + # count from zero + count = 0 + + # iterate + for line in lines : + # insert + self.insert_line(channel, line) + + # count + count += 1 + + # return + return count + + def insert_line (self, channel, line) : + """ + Adds a single LogLine for the given LogChannel to the index + """ + + # validate the LogChannel + assert channel.id + + # validate the LogLine + assert line.offset + assert line.timestamp + + # create new document + doc = hype.Document() + + # line date + date = line.timestamp.date() + + # ensure that it's not 1900 + assert date.year != 1900 + + # add URI + doc.add_attr('@uri', "%s/%s/%d" % (channel.id, date.strftime('%Y-%m-%d'), line.offset)) + + # add channel id + doc.add_attr('channel', channel.id) + + # add type + doc.add_attr('type', str(line.type)) + + # add UTC timestamp + doc.add_attr('timestamp', str(utils.to_utc_timestamp(line.timestamp))) + + # add source attribute? + if line.source : + source_nickname, source_username, source_hostname, source_chanflags = line.source + + if source_nickname : + doc.add_attr('source_nickname', source_nickname.encode('utf8')) + + if source_username : + doc.add_attr('source_username', source_username.encode('utf8')) + + if source_hostname : + doc.add_attr('source_hostname', source_hostname.encode('utf8')) + + if source_chanflags : + doc.add_attr('source_chanflags', source_chanflags.encode('utf8')) + + # add target attributes? + if line.target : + target_nickname = line.target + + if target_nickname : + doc.add_attr('target_nickname', target_nickname.encode('utf8')) + + # add data + if line.data : + doc.add_text(line.data.encode('utf8')) + + # put, "clean up dispensable regions of the overwritten document" + if not self.db.put_doc(doc, hype.Database.PDCLEAN) : + raise SearchIndexError("put_doc", self.db) + + def search_cond (self, cond) : + """ + Search using a raw hype.Condition. Raises NoResultsFound if there aren't any results + """ + + # execute search, unused 'flags' arg stays zero + results = self.db.search(cond, 0) + + # no results? + if not results : + raise NoResultsFound() + + # iterate over the document IDs + for doc_id in results : + # load document, this throws an exception... + # option constants are hype.Database.GDNOATTR/GDNOTEXT + doc = self.db.get_doc(doc_id, 0) + + # load the attributes/text + channel = self.channels.lookup(doc.attr('channel')) + type = int(doc.attr('type')) + timestamp = utils.from_utc_timestamp(int(doc.attr('timestamp'))) + + # source + source = (doc.attr('source_nickname'), doc.attr('source_username'), doc.attr('source_hostname'), doc.attr('source_chanflags')) + + # target + target = doc.attr('target_nickname') + + # message text + message = doc.cat_texts().decode('utf8') + + # build+yield to as LogLine + yield log_line.LogLine(channel, None, type, timestamp, source, target, message) + + def search (self, options=None, channel=None, attrs=None, phrase=None, order=None, max=None, skip=None) : + """ + Search with flexible parameters + + options - bitmask of hype.Condition.* + channel - LogChannel object + attrs - raw attribute expressions + phrase - the search query phrase + order - order attribute expression + max - number of results to return + skip - number of results to skip + """ + + # build condition + cond = hype.Condition() + + if options : + # set options + cond.set_options(options) + + if channel : + # add channel attribute + cond.add_attr(("channel STREQ %s" % channel.id).encode('utf8')) + + if attrs : + # add attributes + for attr in attrs : + cond.add_attr(attr.encode('utf8')) + + if phrase : + # add phrase + cond.set_phrase(phrase.encode('utf8')) + + if order : + # set order + cond.set_order(order) + + if max : + # set max + cond.set_max(max) + + if skip : + # set skip + cond.set_skip(skip) + + # execute + return self.search_cond(cond) + + def search_simple (self, channel, query, count=None, offset=None, search_msg=True, search_nick=False) : + """ + Search for lines from the given channel for the given simple query. + + The search_* params define which attributes to search for (using fulltext search for the message, STROR for + attributes). + """ + + # search attributes + attrs = [] + + # nickname target query + if search_nick : + attrs.append("source_nickname STRINC %s" % query) +# attrs.append("target_nickname STRINC %s" % query) + + # use search(), backwards + results = list(self.search( + # simplified phrase + options = hype.Condition.SIMPLE, + + # specific channel + channel = channel, + + # given phrase + phrase = query if search_msg else None, + + # attributes defined above + attrs = attrs, + + # order by timestamp, descending (backwards) + order = "timestamp NUMD", + + # count/offset + max = count, + skip = offset, + )) + + # reverse + return reversed(results) + + def list (self, channel, date, count=None, skip=None) : + """ + List all indexed log items for the given UTC date + """ + + # start/end dates + dt_start = datetime.datetime(date.year, date.month, date.day, 0, 0, 0, 0) + dt_end = datetime.datetime(date.year, date.month, date.day, 23, 23, 59, 999999) + + # search + return self.search( + # specific channel + channel = channel, + + # specific date range + attrs = [ + "timestamp NUMBT %d %d" % (utils.to_utc_timestamp(dt_start), utils.to_utc_timestamp(dt_end)) + ], + + # order correctly + order = "timestamp NUMA", + + # max count/offset + max = count, + skip = skip + ) + +def get_index () : + """ + Returns the default read-only index, suitable for searching + """ + + # XXX: no caching, just open it every time + _index = LogSearchIndex(config.LOG_CHANNELS, config.SEARCH_INDEX_PATH, 'r') + + # return + return _index + diff -r 9c7769850195 -r 6db2527b67cf qmsk/irclogs/log_source.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qmsk/irclogs/log_source.py Sun Sep 13 01:15:56 2009 +0300 @@ -0,0 +1,679 @@ +""" + A source of IRC log files +""" + +import datetime, calendar, itertools, functools, math +import os, os.path, errno +import pytz + +import config, utils + +# a timedelta that represents one day +ONE_DAY = datetime.timedelta(days=1) + +class LogSourceDecoder (object) : + """ + Handles decoding of LogSource lines + """ + + def __init__ (self, encoding_list) : + """ + Will try each of the given (charset, errors) items in turn, until one succeeds + """ + + self.encoding_list = encoding_list + + def decode (self, line) : + """ + Decode the line of str() text into an unicode object + """ + + # list of errors encountered + error_list = [] + + # try each in turn + for charset, errors in self.encoding_list : + # trap UnicodeDecodeError to try with the next one + try : + return line.decode(charset, errors) + + except UnicodeDecodeError, e : + error_list.append("%s:%s - %s" % (charset, errors, e)) + continue + + # failure + raise UnicodeDecodeError("Failed to decode line: %r: %s" % (line, ', '.join(error_list))) + +class LogSource (object) : + """ + A collection of IRC logs for a specific target in some format. Provides the possibility to read specific events + """ + + def __init__ (self, decoder, channel=None) : + """ + The appropriate LogChannel must be given, as we need to be able to construct the LogLines. If it is not yet + known, then it can be given as None, and set later with bind_channel. + + Uses the given LogSourceDecoder to decode the lines. + """ + + self.channel = channel + self.decoder = decoder + + def bind_channel (self, channel) : + """ + Set this source's channel, where None was set before + """ + + assert not self.channel + + self.channel = channel + + def get_latest (self, count) : + """ + Yield the latest events, up to `count` of them. + """ + + abstract + + def get_date (self, dt) : + """ + Get logs for the given date (as a datetime). + """ + + abstract + + def get_date_paged (self, dt, count, page=None) : + """ + Get the logs for a given date (as a datetime), divided into pages of count each. If page is given, the time + portion of the dt is ignored, and the lines for the given page are returned. Otherwise, if page is None, + then the lines for the page containing the given timestamp is returned. + + The return value is a (page, max, lines) tuple. + """ + + # how to act? + if page : + # constant skip + skip = (page - 1) * count + + else : + skip = None + + # go through the logs a page at a time + this_page = 1 + + # last line's timestamp + last_ts = None + + # found it yet? + found = False + + # count the full number of lines + line_count = 0 + + # collect lines + lines = [] + + # iterate using get_date + for line in self.get_date(dt) : + # count them + line_count += 1 + + # skip? + if skip : + skip -= 1 + continue + + # is this page all that we want/need? + if page or found : + # already full? + if len(lines) >= count : + continue + + # specfic timestamp + else : + # didn't find it in this page? + if len(lines) >= count : + # reset to next page + lines = [] + this_page += 1 + + # is dt between these two timestamps? + if (not last_ts or last_ts <= dt) and (dt <= line.timestamp) : + # found! + found = True + page = this_page + + else : + # keep looking + last_ts = line.timestamp + + # store line + lines.append(line) + + # calculate max_pages + max_pages = math.ceil(float(line_count) / count) + + # return + return (page, max_pages, lines) + + def get_month_days (self, dt) : + """ + Return an ordered sequence of dates, telling which days in the given month (as a datetime) have logs available. + """ + + abstract + + def get_modified (self, dt=None, after=None, until=None) : + """ + Returns a sequence of LogLines that may have been *modified* from their old values since the given datetime. + + If the datetime is not given, *all* lines are returned. + + If after is given, only lines from said date onwards will be returned, regardless of modification. + If until is given, only lines up to and including said date will be returned, regardless of modification. + + The LogLines should be in time order. + """ + + abstract + + def get_prev_date (self, dt) : + """ + Get the next distinct date of logs available preceeding the given date, or None + """ + + abstract + + def get_next_date (self, dt) : + """ + Get the next distinct date of logs following the given date, or None. + """ + + abstract + +class LogFile (object) : + """ + A file containing LogEvents + + XXX: modify to implement LogSource? + """ + + def __init__ (self, path, parser, decoder, channel=None, start_date=None, sep='\n') : + """ + Open the file at the given path, which contains lines as separated by the given separator. Lines are + decoded using the given LogSourceDecoder, and then parsed using the given parser, using the given date + as the initial date for this log's first line. + + XXX: currently we assume start_date also for the end of the file + """ + + # store + self.channel = channel + self.path = path + self.parser = parser + self.start_date = start_date + self.decoder = decoder + self.sep = sep + + # open + self.file = open(path, 'rb') + + def __iter__ (self) : + """ + Yields a series of unicode lines, as read from the top of the file + """ + + # seek to beginning + self.file.seek(0) + + # iterate over lines, decoding them as well + return (self.decoder.decode(line.rstrip(self.sep)) for line in self.file) + + def read_full (self) : + """ + Reads all LogLines. The LogLines will have a valid offset. + """ + + # just use our __iter__ + return self.parser.parse_lines(self.channel, self, self.start_date, starting_offset=1) + + def read_from (self, dt) : + """ + Reads all LogLines from the given naive timestamp onwards + """ + + # start reading at beginning + events = self.read_full() + + # skip unwanted events + for event in events : + if event.timestamp < dt : + continue + + else : + # include this line as well + yield event + break + + # yield the rest as-is + for event in events : + yield event + + def read_until (self, dt) : + """ + Reads all LogLines up until the given naive timestamp + """ + + # start reading events at the beginning + events = self.read_full() + + # yield events until we hit the given timestamp + for event in events : + if event.timestamp <= dt : + yield event + + else : + break + + # ignore the rest + return + + def _read_blocks_reverse (self, blocksize=1024) : + """ + Yields blocks of file data in reverse order, starting at the end of the file + """ + + # seek to end of file + self.file.seek(0, os.SEEK_END) + + # read offset + # XXX: hack -1 to get rid of trailing newline + size = offset = self.file.tell() - 1 + + # do not try to read past the beginning of the file + while offset > 0: + # calc new offset + size + if offset > blocksize : + # full block + offset -= blocksize + read_size = blocksize + + else : + # partial block + read_size = offset + offset = 0 + + # seek to offset + self.file.seek(offset) + + # read the data we want + block = self.file.read(read_size) + + # sanity check + assert len(block) == read_size + + # yield + yield block + + def _read_lines_reverse (self) : + """ + Yields decoded lines from the end of the file, in reverse order. + """ + + # partial lines + buf = '' + + # read from end of file, a block at a time + for block in self._read_blocks_reverse() : + # add in our previous buf + buf = block + buf + + # split up lines + lines = buf.split(self.sep) + + # keep the first one as our buffer, as it's incomplete + buf = lines[0] + + # yield the rest a line at a time in reverse order... this looks weird, but that's how slicing works :) + # XXX: use something like islice, this has to build a slice object + for line in lines[:0:-1] : + yield self.decoder.decode(line) + + def read_latest (self, count) : + """ + Returns up to count events, from the end of the file, or less, if the file doesn't contain that many lines. + """ + + # the list of lines + lines = [] + + # start reading lines into lines + for line in self._read_lines_reverse() : + # append + lines.append(line) + + # done? + if len(lines) >= count : + break + + # decode in reverse order, using our starting date.... + # XXX: use lines[::-1] or reversed? + # XXX: it may make more sense to parse in reverse order, using 'self.end_date' or something like that + return self.parser.parse_lines(self.channel, reversed(lines), self.start_date) + +class LogDirectory (LogSource) : + """ + A directory containing a series of timestamped LogFiles + """ + + def __init__ (self, path, tz, parser, decoder, filename_fmt, channel=None) : + """ + Load the logfiles at the given path, which are for the given LogChannel + + Decode the file lines using the given decoder, the files are named according the the date in the given + timezone and date format, and will be parsed using the given parser. + """ + + # store + self.channel = channel + self.path = path + self.tz = tz + self.parser = parser + self.decoder = decoder + self.filename_fmt = filename_fmt + + def _get_logfile_date (self, d, load=True, mtime=False, ignore_missing=False) : + """ + Get the logfile corresponding to the given naive date in our timezone. + + If load is False, only test for the presence of the logfile, do not actually open it. If mtime is given, + then this returns the file's mtime + + Returns None if the logfile does not exist, unless ignore_missing is given as False. + """ + + # format filename + filename = d.strftime(self.filename_fmt) + + # build path + path = os.path.join(self.path, filename) + + try : + if load : + # open+return the LogFile + return LogFile(path, self.parser, self.decoder, start_date=d, channel=self.channel) + + elif mtime : + # stat + return utils.mtime(path) + + else : + # test + return os.path.exists(path) + + # XXX: move to LogFile + except IOError, e : + # return None for missing files + if e.errno == errno.ENOENT and ignore_missing : + return None + + else : + raise + + def _iter_logfile_dates (self, after=None, until=None, reverse=False) : + """ + Yields a series of naive datetime objects representing the logfiles that are available, in time order. + + Parameters : + after only dates from said date onwards will be returned + until only dates up to and including said date will be returned + reverse the dates are returned in reverse order instead. Note that the meaning of after/until doesn't change + """ + + # convert timestamps to our timezone's dates + if after : + after = after.astimezone(self.tz).date() + + if until : + until = until.astimezone(self.tz).date() + + # listdir + filenames = os.listdir(self.path) + + # sort + filenames.sort(reverse=reverse) + + # iter files + for filename in filenames : + try : + # parse date + dt = self.tz.localize(datetime.datetime.strptime(filename, self.filename_fmt)) + date = dt.date() + + except : + # ignore + continue + + else : + if (after and date < after) or (until and date > until) : + # ignore + continue + + else : + # yield + yield dt + + def _iter_date_reverse (self, dt=None) : + """ + Yields an infinite series of naive date objects in our timezone, iterating backwards in time starting at the + given *datetime*, or the the current date, if none given + """ + + # default to now + if not dt : + dtz = self.tz.localize(datetime.datetime.now()) + + else : + # convert to target timezone + dtz = dt.astimezone(self.tz) + + # iterate unto infinity + while True : + # yield + yield dtz.date() + + # one day sdrawkcab + dtz -= ONE_DAY + + def _iter_logfile_reverse (self, dt=None, max_files=100) : + """ + Yields a series of LogFile objects, iterating backwards in time starting at the given datetime, or the + current date, if none given. + + Reads/probes at most max_files files. + """ + + # start counting at zero... + file_count = 0 + + # have we found any files at all so far? + have_found = False + + # iterate backwards over days + for day in self._iter_date_reverse(dt) : + # stop if we've handled enough files by now + if file_count > max_files : + break + + # try and open the next logfile + logfile = None + + file_count += 1 + logfile = self._get_logfile_date(day, ignore_missing=True) + + # no logfile there? + if not logfile : + # hit our limit? + if file_count > max_files : + # if we didn't find any logfiles at all, terminate rudely + if not have_found : + raise Exception("No recent logfiles found") + + else : + # stop looking, deal with what we've got + return + + else : + # skip to next day + continue + + # mark have_found + have_found = True + + # yield it + yield logfile + + def get_latest (self, count) : + """ + Uses _logfile_reverse to read the yield the given lines from as many logfiles as needed + """ + + # read the events into here + lines = [] + + # start reading in those logfiles + for logfile in self._iter_logfile_reverse() : + # read the events + # XXX: use a queue + lines = list(logfile.read_latest(count)) + lines + + # done? + if len(lines) >= count : + break + + # return the events + return lines + + def get_date (self, dt) : + """ + A 'day' is considered to be a 24-hour period from 00:00:00 23:59:59. If the timezone of the given datetime + differs from our native datetime, this may involve lines from more than one logfile. + """ + + # begin/end of 24h period, in target timezone + dtz_begin = dt.replace(hour=0, minute=0, second=0).astimezone(self.tz) + dtz_end = dt.replace(hour=23, minute=59, second=59, microsecond=999999).astimezone(self.tz) + + # as dates + d_begin = dtz_begin.date() + d_end = dtz_end.date() + +# print +# print "LogDirectory.get_date - %s" % dt +# print "\t %s %s" % (d_begin, dtz_begin) +# print "\t-> %s %s" % (d_end, dtz_end) + + # if they're the same, just pull the full log for that date + if d_begin == d_end : + # open that log + logfile = self._get_logfile_date(d_begin) + + # return the full data + return logfile.read_full() + + # otherwise, we need to pull two partial logs + else : + # open both of them, but it's okay if we don't have the second one + f_begin = self._get_logfile_date(d_begin) + f_end = self._get_logfile_date(d_end, ignore_missing=True) + + # chain together the two sources + return itertools.chain( + f_begin.read_from(dtz_begin), + f_end.read_until(dtz_end) if f_end else [] + ) + + def _iter_month_days (self, month) : + """ + Iterates over the days of a month as dt objects with time=0 + """ + + # there's at most 31 days in a month... + for day in xrange(1, 32) : + try : + # try and build the datetime + dt = datetime.datetime(month.year, month.month, day) + + except : + # stop + return + + else : + # fix timezones + yield + yield month.tzinfo.localize(dt) + + def get_month_days (self, month) : + """ + Returns a set of dates for which logfiles are available in the given datetime's month + """ + + # iterate over month's days + for dt in self._iter_month_days(month) : + # date in our target timezone + log_date = dt.astimezone(self.tz).date() + + # test for it + if self._get_logfile_date(log_date, load=False, ignore_missing=True) : + # valid + yield dt.date() + + def get_modified (self, dt=None, after=None, until=None) : + """ + Returns the contents off all logfiles with mtimes past the given date + """ + + # iterate through all available logfiles in date order, as datetimes, from the given date on + for log_date in self._iter_logfile_dates(after, until) : + # compare against dt? + if dt : + # stat + mtime = self._get_logfile_date(log_date, load=False, mtime=True, ignore_missing=True) + + # not modified? + if mtime < dt : + # skip + continue + + # open + logfile = self._get_logfile_date(log_date) + + # yield all lines + for line in logfile.read_full() : + yield line + + def get_prev_date (self, dt) : + """ + Just use _iter_logfile_dates + """ + + # use for to "iter" once + for log_date in self._iter_logfile_dates(until=dt - ONE_DAY, reverse=True) : + return log_date + + else : + return None + + def get_next_date (self, dt) : + """ + Just use _iter_logfile_dates + """ + + # use for to "iter" once + for log_date in self._iter_logfile_dates(after=dt + ONE_DAY) : + return log_date + + else : + return None + diff -r 9c7769850195 -r 6db2527b67cf qmsk/irclogs/preferences.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qmsk/irclogs/preferences.py Sun Sep 13 01:15:56 2009 +0300 @@ -0,0 +1,534 @@ +""" + Handling user preferences +""" + +import functools +import Cookie + +from qmsk.web import urltree +import utils + +class Preference (urltree.URLType) : + """ + A specific preference + """ + + # the name to use + name = None + + # the default value, as from parse() + default = None + + def is_default (self, value) : + """ + Returns True if the given post-value is the default value for this preference. + + Defaults to just compare against self.default + """ + + return (value == self.default) + + def process (self, preferences, value) : + """ + Post-process this preference value. This can access the post-processed values of all other preferences that + were defined before this one in the list given to Preferences. + + Defaults to just return value. + """ + + return value + +class RequestPreferences (object) : + """ + Represents the specific preferences for some request + """ + + def __init__ (self, preferences, request, value_map=None) : + """ + Initialize with the given Preferences object, http Request, and { key: value } mapping of raw preference values. + + This will build a mapping of { name: pre-value } using Preference.parse/Preference.default, and then + post-process them into the final { name: value } mapping using Preference.process, in strict pref_list + order. Note that the process() method will only have access to those preferences processed before it was. + """ + + # store + self.preferences = preferences + self.request = request + + # initialize + self.values = {} + self.set_cookies = {} + + # initial value map + pre_values = {} + + # load preferences + for pref in preferences.pref_list : + # got a value for it? + if value_map and pref.name in value_map : + # get value + value = value_map[pref.name] + + # parse it + value = pref.parse(value) + + else : + # use default value + value = pref.default + + # add + pre_values[pref.name] = value + + # then post-process using Preferences.process(), in strict pref_list order + for pref in preferences.pref_list : + # store into self.values, so that pref.get(...) will be able to access the still-incomplete self.values + # dict + self.values[pref.name] = pref.process(self, pre_values[pref.name]) + + def _get_name (self, pref) : + """ + Look up a Preference's name, either by class, object or name. + """ + + # Preference -> name + if isinstance(pref, Preference) : + pref = pref.name + + return pref + + def pref (self, name) : + """ + Look up a Preference by object, name + """ + + # Preference + if isinstance(name, Preference) : + return name + + # Preference.name + elif isinstance(name, basestring) : + return self.preferences.pref_map[name] + + # XXX: class? + else : + assert False + + def get (self, pref) : + """ + Return the value for the given Preference, or preference name + """ + + # look up + return self.values[self._get_name(pref)] + + # support dict-access + __getitem__ = get + + def is_default (self, pref) : + """ + Returns True if the given preference is at its default value + """ + + # determine using Preference.is_default + return self.pref(pref).is_default(self.get(pref)) + + def build (self, pref) : + """ + Like 'get', but return the raw cookie value + """ + + # the Preference + pref = self.pref(pref) + + # build + return pref.build(self.get(pref)) + + def parse (self, pref, value=None) : + """ + Parse+process the raw value for some pref into a value object. + + Is the given raw value is None, this uses Preference.default + """ + + # lookup pref + pref = self.pref(pref) + + # build value + if value is not None : + # parse + value = pref.parse(value) + + else : + # default + value = pref.default + + # post-process + value = pref.process(self, value) + + # return + return value + + def set (self, name, value_obj=None) : + """ + Set a new value for the given preference (by str name). + + If value_obj is None, then the preference cookie is unset + """ + + # sanity-check to make sure we're not setting it twice... + assert name not in self.set_cookies + + # None? + if value_obj is not None : + # encode using the Preference object + value_str = self.preferences.pref_map[name].build(value_obj) + + else : + # unset as None + value_str = None + + # update in our dict + self.values[name] = value_obj + + # add to set_cookies + self.set_cookies[name] = value_str + +class Preferences (object) : + """ + Handle user preferences using cookies + """ + + def __init__ (self, pref_list) : + """ + Use the given list of Preference objects. + + The ordering of the given pref_list is significant for the process() implementation, as the + Preferences are process()'d in order. + """ + + # store + self.pref_list = pref_list + + # translate to mapping as well + self.pref_map = dict((pref.name, pref) for pref in pref_list) + + def load (self, request, ) : + """ + Load the set of preferences for the given request, and return as a { name -> value } dict + """ + + # the dict of values + values = {} + + # load the cookies + cookie_data = request.env.get('HTTP_COOKIE') + + # got any? + if cookie_data : + # parse into a SimpleCookie + cookies = Cookie.SimpleCookie(cookie_data) + + # update the the values + values.update((morsel.key, morsel.value) for morsel in cookies.itervalues()) + + else : + cookies = None + + # apply any query parameters + for pref in self.pref_list : + # look for a query param + value = request.get_arg(pref.name) + + if value : + # override + values[pref.name] = value + + # build the RequestPreferences object + return cookies, RequestPreferences(self, request, values) + + def handler (self, *pref_list) : + """ + Intended to be used as a decorator for a request handler, this will load the give Preferences and pass + them to the wrapped handler as keyword arguments, in addition to any others given. + """ + + def _decorator (func) : + @functools.wraps(func) + def _handler (request, **args) : + # load preferences + cookies, prefs = self.load(request) + + # bind to request.prefs + # XXX: better way to do this? :/ + request.prefs = prefs + + # update args with new ones + args.update(((pref.name, prefs.get(pref)) for pref in pref_list)) + + # handle to get response + response = func(request, **args) + + # set cookies? + if prefs.set_cookies : + # default, empty, cookiejar + if not cookies : + cookies = Cookie.SimpleCookie('') + + # update cookies + for key, value in prefs.set_cookies.iteritems() : + if value is None : + assert False, "Not implemented yet..." + + else : + # set + cookies[key] = value + cookies[key]["path"] = config.PREF_COOKIE_PATH + cookies[key]["expires"] = config.PREF_COOKIE_EXPIRE_SECONDS + + # add headers + for morsel in cookies.itervalues() : + response.add_header('Set-cookie', morsel.OutputString()) + + return response + + # return wrapped handler + return _handler + + # return decorator... + return _decorator + +# now for our defined preferences.... +import pytz +import config + +class TimeFormat (urltree.URLStringType, Preference) : + """ + Time format + """ + + # set name + name = 'time_format' + + # default value + default = config.PREF_TIME_FMT_DEFAULT + +class DateFormat (urltree.URLStringType, Preference) : + """ + Date format + """ + + # set name + name = 'date_format' + + # default value + default = config.PREF_DATE_FMT_DEFAULT + +class TimezoneOffset (Preference) : + """ + If the DST-aware 'timezone' is missing, we can fallback to a fixed-offset timezone as detected by + Javascript. + + This is read-only, and None by default + """ + + name = 'timezone_offset' + default = None + + def parse (self, offset) : + """ + Offset in minutes -> said minutes + """ + + return int(offset) + +class Timezone (Preference) : + """ + Timezone + """ + + # set name + name = 'timezone' + + # default is handled via process() + default = 'auto' + + # the list of available (value, name) options for use with helpers.select_options + OPTIONS = [('auto', "Autodetect")] + [(None, tz_name) for tz_name in pytz.common_timezones] + + def parse (self, name) : + """ + default -> default + tz_name -> pytz.timezone + """ + + # special-case for 'auto' + if name == self.default : + return self.default + + else : + return pytz.timezone(name) + + def is_default (self, tz) : + """ + True if it's a FixedOffsetTimezone or PREF_TIMEZONE_FALLBACK + """ + + return (isinstance(tz, utils.FixedOffsetTimezone) or tz == config.PREF_TIMEZONE_FALLBACK) + + def build (self, tz) : + """ + FixedOffsetTimezone -> None + pytz.timezone -> tz_name + """ + + # special-case for auto/no explicit timezone + if self.is_default(tz) : + return self.default + + else : + # pytz.timezone zone name + return tz.zone + + def process (self, prefs, tz) : + """ + If this timezone is given, simply build that. Otherwise, try and use TimezoneOffset, and if that fails, + just return the default. + + None -> FixedOffsetTimezone/PREF_TIMEZONE_FALLBACK + pytz.timezone -> pytz.timezone + """ + + # specific timezone set? + if tz != self.default : + return tz + + # fixed offset? + elif prefs[timezone_offset] is not None : + return utils.FixedOffsetTimezone(prefs[timezone_offset]) + + # default + else : + return config.PREF_TIMEZONE_FALLBACK + +class ImageFont (Preference) : + """ + Font for ImageFormatter + """ + + # set name + name = 'image_font' + + def __init__ (self, font_dict, default_name) : + """ + Use the given { name: (path, title) } dict and default the given name + """ + + self.font_dict = font_dict + self.default = self.parse(default_name) + + def parse (self, name) : + """ + name -> (name, path, title) + """ + + path, title = self.font_dict[name] + + return name, path, title + + def build (self, font_info) : + """ + (name, path, title) -> name + """ + + name, path, title = font_info + + return name + +class ImageFontSize (urltree.URLIntegerType, Preference) : + # set name, default + name = 'image_font_size' + default = config.PREF_IMAGE_FONT_SIZE_DEFAULT + + # XXX: constraints for valid values + +class Formatter (Preference) : + """ + LogFormatter to use + """ + + # set name + name = 'formatter' + + def __init__ (self, formatters, default) : + """ + Use the given { name -> class LogFormatter } dict and default (a LogFormatter class) + """ + + self.formatters = formatters + self.default = default + + def parse (self, fmt_name) : + """ + fmt_name -> class LogFormatter + """ + + return self.formatters[fmt_name] + + def build (self, fmt_cls) : + """ + class LogFormatter -> fmt_name + """ + + return fmt_cls.name + + def process (self, prefs, fmt_cls) : + """ + class LogFormatter -> LogFormatter(tz, time_fmt, image_font.path) + """ + + # time stuff + tz = prefs[timezone] + time_fmt = prefs[time_format] + + # font stuff + font_name, font_path, font_title = prefs[image_font] + font_size = prefs[image_font_size] + + return fmt_cls(tz, time_fmt, font_path, font_size) + +class Count (urltree.URLIntegerType, Preference) : + """ + Number of lines of log data to display per page + """ + + # set name + name = "count" + + # default + default = config.PREF_COUNT_DEFAULT + + def __init__ (self) : + super(Count, self).__init__(allow_negative=False, allow_zero=False, max=config.PREF_COUNT_MAX) + +# and then build the Preferences object +time_format = TimeFormat() +date_format = DateFormat() +timezone_offset = TimezoneOffset() +timezone = Timezone() +image_font = ImageFont(config.FORMATTER_IMAGE_FONTS, config.PREF_IMAGE_FONT_DEFAULT) +image_font_size = ImageFontSize() +formatter = Formatter(config.LOG_FORMATTERS, config.PREF_FORMATTER_DEFAULT) +count = Count() + +preferences = Preferences([ + time_format, + date_format, + timezone_offset, + timezone, + image_font, + image_font_size, + formatter, + count, +]) + diff -r 9c7769850195 -r 6db2527b67cf qmsk/irclogs/urls.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qmsk/irclogs/urls.py Sun Sep 13 01:15:56 2009 +0300 @@ -0,0 +1,46 @@ + +""" + URL mapping for the irclogs.qmsk.net site +""" + +# urltree stuff +from qmsk.web import urltree + +# our own handlers +import handlers + +# for types +import utils + +# for configuration +import config + +# our URLTypes +types = dict( + # LogChannel + cid = utils.URLChannelName(config.LOG_CHANNELS.dict()), + + # datetime + date = utils.URLDateType(config.URL_DATE_FMT), + + # UTC timestamp + ts = utils.URLTimestampType(), +) + +# our URLConfig +urls = url = urltree.URLConfig(type_dict=types) + +# urls +index = url('/', handlers.index ) +preferences = url('/preferences', handlers.preferences_ ) +channel_select = url('/channel_select/?channel:cid', handlers.channel_select ) +channel = url('/channels/{channel:cid}', handlers.channel_last, count=20 ) +channel_last = url('/channels/{channel:cid}/last/{count:int=100}/{type=}', handlers.channel_last ) +channel_link = url('/channels/{channel:cid}/link/{timestamp:ts}/?type=', handlers.channel_link ) +channel_calendar = url('/channels/{channel:cid}/calendar/{year:int=0}/{month:int=0}', handlers.channel_calendar ) +channel_date = url('/channels/{channel:cid}/date/{date:date}/?page:int=1&type=', handlers.channel_date ) +channel_search = url('/channels/{channel:cid}/search/?q=&page:int=1&max:int=1&type=&t:list=', handlers.channel_search ) + +# mapper +mapper = urltree.URLTree(urls) + diff -r 9c7769850195 -r 6db2527b67cf qmsk/irclogs/utils.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qmsk/irclogs/utils.py Sun Sep 13 01:15:56 2009 +0300 @@ -0,0 +1,137 @@ +""" + Miscellaneous things +""" + +import datetime, calendar, pytz +import os, errno + +from qmsk.web.urltree import URLType + +class URLChannelName (URLType) : + """ + Handle LogChannel names in URLs. Deals with instances of LogChannel + """ + + def __init__ (self, channels) : + """ + Use the given { name -> LogChannel } dict + """ + + self.channels = channels + + def parse (self, chan_name) : + """ + chan_name -> LogChannel + """ + + return self.channels[chan_name] + + def build (self, chan) : + """ + LogChannel -> chan_name + """ + + return chan.id + +class URLDateType (URLType) : + """ + Handle dates in URLs as naive datetime objects (with indeterminate time info) + """ + + def __init__ (self, date_fmt) : + """ + Format/parse dates using the given format + """ + + self.date_fmt = date_fmt + + def parse (self, date_str) : + """ + date_str -> naive datetime.datetime + """ + + return datetime.datetime.strptime(date_str, self.date_fmt) + + def build (self, date) : + """ + datetime.date -> date_str + """ + + return date.strftime(self.date_fmt) + +class URLTimestampType (URLType) : + """ + Handles an integer UNIX timestamp as an UTC datetime + """ + + def parse (self, timestamp_str) : + """ + timestamp_str -> pytz.utc datetime.datetime + """ + + return from_utc_timestamp(int(timestamp_str)) + + def build (self, dtz) : + """ + pytz.utc datetime.datetime -> timestamp_str + """ + + return str(to_utc_timestamp(dtz)) + +def from_utc_timestamp (timestamp) : + """ + Converts a UNIX timestamp into a datetime.datetime + """ + + return datetime.datetime.utcfromtimestamp(timestamp).replace(tzinfo=pytz.utc) + +def to_utc_timestamp (dt) : + """ + Converts a datetime.datetime into a UNIX timestamp + """ + + return calendar.timegm(dt.utctimetuple()) + +def mtime (path, ignore_missing=False) : + """ + Gets the mtime for the given path as an UTC datetime, or None, if the file doesn't exist and ignore_missing + """ + + try : + # stat + st = os.stat(path) + + # trap IOError + except os.error, e : + # ENOENT? + if ignore_missing and e.errno == errno.ENOENT : + return None + + else : + raise + + else : + # decode + return from_utc_timestamp(st.st_mtime) + +class FixedOffsetTimezone (pytz._FixedOffset) : + """ + A Fixed-offset timezone with no DST info, compatible with pytz. + + This is based on pytz._FixedOffset, but overrides dst() to return timedelta(0) + """ + + def __init__ (self, minutes) : + """ + Minutes is simply the offset from UTC in minutes, positive or negative, at most 24h. + """ + + pytz._FixedOffset.__init__(self, minutes) + + def dst (self, dt) : + """ + No DST info + """ + + return datetime.timedelta(0) + diff -r 9c7769850195 -r 6db2527b67cf qmsk/irclogs/version.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qmsk/irclogs/version.py Sun Sep 13 01:15:56 2009 +0300 @@ -0,0 +1,106 @@ +""" + Figuring out the project version + + Currently this only supports mercurial +""" + +# only load this once +_VERSION = None + +def version_mercurial (path) : + """ + Returns a (branch, tags, parents, modified) tuple for the given repo's working copy + """ + + global _VERSION + + # cached? + if _VERSION : + return _VERSION + + # code adapted from mercurial.commands.identify + from mercurial import ui, hg, encoding + from mercurial.node import short + + # open the repo + repo = hg.repository(ui.ui(), path) + + # the working copy change context + ctx = repo[None] + + # branch + branch = encoding.tolocal(ctx.branch()) + + # map default -> None + if branch == 'default' : + branch = None + + # list of tags, without 'tip' tag + tags = [tag for tag in ctx.tags() if tag != 'tip'] + + # ctx's parents + parents = [short(p.node()) for p in ctx.parents()] + + # local modifications? + modified = bool(ctx.files() + ctx.deleted()) + + # done + _VERSION = (branch, tags, parents, modified) + return _VERSION + +def version_string (path='.') : + """ + Return a version string representing the version of the software at the given path. + + Currently, this assumes that the given path points to a local Mercurial repo. + """ + + try : + # get info + branch, tags, parents, modified = version_mercurial(path) + + except : + # XXX: ignore + raise + + # tags: [ "-" [ ... ]] + if tags : + return '-'.join(tags) + + # revision: [ "+" [ ... ]] [ "+" ] + revision = '+'.join(p for p in parents) + ('+' if modified else '') + + if branch : + # branch: "(" ")" + return '(%s)%s' % (branch, revision) + + else : + # plain: + return revision + +def version_link_hg (hgweb_url, path='.') : + """ + Returns a link to a hgweb page for this version + """ + + # URL for revision ID + rev_url = lambda rev: '%(rev)s' % dict(url=hgweb_url, rev=rev) + + # get info + branch, tags, parents, modified = version_mercurial(path) + + # tags: [ "-" [ ... ]] [ "+" ] + if tags : + return '-'.join(rev_url(tag) for tag in tags) + ('+' if modified else '') + + # revision: [ "+" [ ... ]] [ "+" ] + revision = '+'.join(rev_url(p) for p in parents) + ('+' if modified else '') + + if branch : + # branch: "(" ")" [ "+" ] + return '(%s)%s' % (rev_url(branch), revision) + ('+' if modified else '') + + else : + # plain: + return revision + diff -r 9c7769850195 -r 6db2527b67cf qmsk/irclogs/wsgi.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qmsk/irclogs/wsgi.py Sun Sep 13 01:15:56 2009 +0300 @@ -0,0 +1,31 @@ +""" + Our custom WSGI application +""" + +from qmsk.web import wsgi + +import urls, error + +# our custom app with custom error() method +class Application (wsgi.Application) : + def __init__ (self) : + """ + Construct wsgi.Application with our URLMapper + """ + + super(Application, self).__init__(urls.mapper) + + def handle_error (self, exc_info, env, start_response) : + """ + Use error.build_error and return that + """ + + # get info + status, content_type, body = error.build_error(env=env) + + # headers + start_response(status, [('Content-type', content_type)], exc_info) + + # body + return body + diff -r 9c7769850195 -r 6db2527b67cf scripts/search-index --- a/scripts/search-index Sun Sep 13 00:49:55 2009 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,640 +0,0 @@ -#!/usr/bin/env python2.5 - -""" - Tool for accessing the search index -""" - -# XXX: fix path -import sys; sys.path.insert(0, '.'); sys.path.insert(0, '..') - -import os, os.path, fcntl -import datetime, pytz -import optparse - -# configuration and the LogSearchIndex module -import config, utils, log_search, channels - -def _open_index (options, open_mode) : - """ - Opens the LogSearchIndex - """ - - return log_search.LogSearchIndex(config.LOG_CHANNELS, options.index_path, open_mode) - - -def _open_index_and_channel (options, channel_name, open_mode) : - """ - Opens+returns a LogSearchIndex and a LogChannel - """ - - # open the LogSearchIndex - index = _open_index(options, open_mode) - - # open the channel - channel = config.LOG_CHANNELS.lookup(channel_name) - - # return - return index, channel - -def _iter_insert_stats (index, channel, lines) : - """ - Insert the given lines into the index. - - Assumes the lines will be in time-order, and yields a series of (date, count) tuples for every date that lines - are inserted for - """ - - # last date - date = None - - # count - count = 0 - - # iter lines - for line in lines : - # next day? - if not date or line.timestamp.date() != date : - if date : - # yield stats - yield date, count - - # reset count - count = 0 - - # timestamp's date - date = line.timestamp.date() - - # insert - index.insert_line(channel, line) - - # count - count += 1 - - # final count? - if date and count : - yield date, count - -def _insert_lines (index, options, channel, lines) : - """ - Insert the given lines into the index. - - Assumes the lines will be in time-order, and prints out as status messages the date and count for the inserted lines - """ - - # iterate insert stats - for date, count in _iter_insert_stats(index, channel, lines) : - # output date header? - if not options.quiet : - print "%s: %s" % (date.strftime('%Y-%m-%d'), count), - -def _load_channel_date (index, options, channel, date) : - """ - Loads the logs for the given date from the channel's LogSource into the given LogSearchIndex - """ - - if not options.quiet : - print "Loading date for channel %s" % channel.id - - try : - # load lines for date - lines = channel.source.get_date(date) - - except Exception, e : - if not options.skip_missing : - raise - - if not options.quiet : - print "\tSkipped: %s" % (e, ) - - else : - # insert - _insert_lines(index, options, channel, lines) - -def _parse_date (options, date_str, tz=None, fmt='%Y-%m-%d') : - """ - Parse the given datetime, using the given timezone(defaults to options.tz) and format - """ - - # default tz - if not tz : - tz = options.timezone - - try : - # parse - return datetime.datetime.strptime(date_str, fmt).replace(tzinfo=tz) - - except Exception, e : - raise CommandError("[ERROR] Invalid date: %s: %s" % (date_str, e)) - -def _output_lines (options, lines) : - """ - Display the formatted LogLines - """ - - # display as plaintext - for line, txt_data in options.formatter.format_txt(lines, full_timestamps=True) : - print txt_data - -class CommandError (Exception) : - """ - Error with command-line arguments - """ - - pass - -def cmd_create (options) : - """ - Creates a new index - """ - - # open index - index = _open_index(options, 'ctrunc' if options.force else 'c') - - # that's all - pass - -def cmd_load (options, channel_name, *dates) : - """ - Loads the logs for a specific channel for the given dates (in terms of the channe logs' timezone) into the index - """ - - # open index/channel - index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a') - - # handle each date - for date_str in dates : - # prase date - try : - date = _parse_date(options, date_str, channel.source.tz) - - # handle errors - except CommandError, e : - if options.skip_missing : - print "[ERROR] %s" % (date_name, e) - - else : - raise - - # otherwise, load - else : - _load_channel_date(index, options, channel, date) - -def cmd_load_month (options, channel_name, *months) : - """ - Loads the logs for a specific channel for the given months (in terms of the channel's timezone) into the index - """ - - # open index/channel - index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a') - - # handle each date - for month_str in months : - # prase date - try : - month = _parse_date(options, month_str, channel.source.tz, '%Y-%m') - - # handle errors - except CommandError, e : - # skip? - if options.skip_missing : - if not options.quiet : - print "[ERROR] %s" % (date_name, e) - continue - - else : - raise - - # get the set of days - days = list(channel.source.get_month_days(month)) - - if not options.quiet : - print "Loading %d days of logs:" % (len(days)) - - # load each day - for date in days : - # convert to datetime - dt = datetime.datetime.combine(date, datetime.time(0)).replace(tzinfo=channel.source.tz) - - # load - _load_channel_date(index, options, channel, dt) - -def cmd_search (options, channel_name, query) : - """ - Search the index for events on a specific channel with the given query - """ - - # sanity-check - if options.create : - raise Exception("--create doesn't make sense for 'search'") - - # open index/channel - index, channel = _open_index_and_channel(options, channel_name, 'r') - - # search - lines = index.search_simple(channel, query) - - # display - _output_lines(options, lines) - -def cmd_list (options, channel_name, *dates) : - """ - List the indexed events for a specific date - """ - - # sanity-check - if options.create : - raise Exception("--create doesn't make sense for 'search'") - - # open index/channel - index, channel = _open_index_and_channel(options, channel_name, 'r') - - # ...for each date - for date_str in dates : - # parse date - date = _parse_date(options, date_str) - - # list - lines = index.list(channel, date) - - # display - _output_lines(options, lines) - -def _autoload_reset (options, channels) : - """ - Reset old autoload state - """ - - # warn - if not options.quiet : - print "[WARN] Resetting autoload state for: %s" % ', '.join(channel.id for channel in channels) - - # iter - for channel in channels : - # statefile path - statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id) - - # is it present? - if not os.path.exists(statefile_path) : - if not options.quiet : - print "[WARN] No statefile found at %s" % statefile_path - - else : - if not options.quiet : - print "\t%s: " % channel.id, - - # remove the statefile - os.remove(statefile_path) - - if not options.quiet : - print "OK" - -def cmd_autoload (options, *channel_names) : - """ - Automatically loads all channel logs that have not been indexed yet (by logfile mtime) - """ - - # open index, nonblocking - index = _open_index(options, 'c?' if options.create else 'a?') - - # default to all channels - if not channel_names : - channels = config.LOG_CHANNELS - - else : - channels = [config.LOG_CHANNELS.lookup(channel_name) for channel_name in channel_names] - - # reset autoload state? - if options.reset : - _autoload_reset(options, channels) - if not options.quiet : - print - - # iterate channels - for channel in channels : - if not options.quiet : - print "Channel %s:" % channel.id - - # no 'from' by default - after = None - - # path to our state file - statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id) - statefile_tmppath = statefile_path + '.tmp' - - # does it exist? - have_tmpfile = os.path.exists(statefile_tmppath) - - # do we have a tempfile from a previous crash? - if have_tmpfile and not options.ignore_resume : - # first, open it... - statefile_tmp = open(statefile_tmppath, 'r+') - - # ... then lock it - fcntl.lockf(statefile_tmp, fcntl.LOCK_EX | fcntl.LOCK_NB) - - # read after timestamp - after_str = statefile_tmp.read().rstrip() - - if after_str : - # parse timestamp - after = utils.from_utc_timestamp(int(after_str)) - - if not options.quiet : - print "\tContinuing earlier progress from %s" % after - - else : - # ignore - if not options.quiet : - print "\t[WARN] Ignoring empty temporary statefile" - - else : - # warn about old tmpfile that was ignored - if have_tmpfile and not options.quiet : - print "\t[WARN] Ignoring old tmpfile state" - - # open new tempfile - statefile_tmp = open(statefile_tmppath, 'w') - - # lock - fcntl.lockf(statefile_tmp, fcntl.LOCK_EX | fcntl.LOCK_NB) - - # override? - if options.reload : - # load all - mtime = None - - if not options.quiet : - print "\tForcing reload!" - - # stat for mtime - else : - # stat for mtime, None if unknown - mtime = utils.mtime(statefile_path, ignore_missing=True) - - if mtime and not options.quiet : - print "\tLast load time was %s" % mtime - - elif not options.quiet : - print "\t[WARN] No previous load state! Loading full logs" - - # only after some specific date? - if options.after : - # use unless read from tempfile - if not after : - after = options.after - - if not options.quiet : - print "\tOnly including dates from %s onwards" % after - - else : - if not options.quiet : - print "\t[WARN] Ignoring --from because we found a tempfile" - - # only up to some specific date? - if options.until : - until = options.until - - if not options.quiet : - print "\tOnly including dates up to (and including) %s" % until - else : - # default to now - until = None - - # get lines - lines = channel.source.get_modified(mtime, after, until) - - # insert - if not options.quiet : - print "\tLoading and inserting..." - print - - # iterate insert() per day to display info and update progress - for date, count in _iter_insert_stats(index, channel, lines) : - # output date header? - if not options.quiet : - print "\t%10s: %d" % (date.strftime('%Y-%m-%d'), count) - - # write temp state - statefile_tmp.seek(0) - statefile_tmp.write(str(utils.to_utc_timestamp(datetime.datetime.combine(date, datetime.time(0))))) - statefile_tmp.flush() - - # write autoload state - open(statefile_path, 'w').close() - - # close+delete tempfile - statefile_tmp.close() - os.remove(statefile_tmppath) - - if not options.quiet : - print - - # done - return - -def cmd_help (options, *args) : - """ - Help about commands - """ - - import inspect - - # general help stuff - options._parser.print_help() - - # specific command? - if args : - # the command name - command, = args - - # XXX: display info about specific command - xxx - - # general - else : - print - print "Available commands:" - - # build list of all cmd_* objects - cmd_objects = [(name, obj) for name, obj in globals().iteritems() if name.startswith('cmd_') and inspect.isfunction(obj)] - - # sort alphabetically - cmd_objects.sort() - - # iterate through all cmd_* objects - for cmd_func_name, cmd_func in cmd_objects : - # remove cmd_ prefix - cmd_name = cmd_func_name[4:] - - # inspect - cmd_args, cmd_varargs, cmd_varkw, cmd_default = inspect.getargspec(cmd_func) - cmd_doc = inspect.getdoc(cmd_func) - - # remove the "options" arg - cmd_args = cmd_args[1:] - - # display - print "\t%10s %-30s : %s" % (cmd_name, inspect.formatargspec(cmd_args, cmd_varargs, None, cmd_default), cmd_doc) - -class MyOption (optparse.Option) : - """ - Our custom types for optparse - """ - - def check_date (option, opt, value) : - """ - Parse a date - """ - - try : - # parse - return datetime.datetime.strptime(value, '%Y-%m-%d') - - # trap -> OptionValueError - except Exception, e : - raise optparse.OptionValueError("option %s: invalid date value: %r" % (opt, value)) - - def check_timezone (option, opt, value) : - """ - Parse a timezone - """ - - try : - # parse - return pytz.timezone(value) - - # trap -> OptionValueError - except Exception, e : - raise optparse.OptionValueError("option %s: invalid timezone: %r" % (opt, value)) - - def take_action (self, action, dest, opt, value, values, parser) : - """ - Override take_action to handle date - """ - - if action == "parse_date" : - # get timezone - tz = values.timezone - - # set timezone - value = value.replace(tzinfo=tz) - - # store - return optparse.Option.take_action(self, 'store', dest, opt, value, values, parser) - - else : - # default - return optparse.Option.take_action(self, action, dest, opt, value, values, parser) - - TYPES = optparse.Option.TYPES + ('date', 'timezone') - TYPE_CHECKER = optparse.Option.TYPE_CHECKER.copy() - TYPE_CHECKER['date'] = check_date - TYPE_CHECKER['timezone'] = check_timezone - ACTIONS = optparse.Option.ACTIONS + ('parse_date', ) - STORE_ACTIONS = optparse.Option.STORE_ACTIONS + ('parse_date', ) - TYPED_ACTIONS = optparse.Option.TYPED_ACTIONS + ('parse_date', ) - ACTIONS = optparse.Option.ACTIONS + ('parse_date', ) - -def main (argv) : - """ - Command-line main, with given argv - """ - - # define parser - parser = optparse.OptionParser( - usage = "%prog [options] [ ... ]", - add_help_option = False, - option_class = MyOption, - ) - - # general options # # # # - general = optparse.OptionGroup(parser, "General Options") - general.add_option('-h', "--help", dest="help", help="Show this help message and exit", - action="store_true" ) - - general.add_option( "--formatter", dest="formatter_name", help="LogFormatter to use", - metavar="FMT", type="choice", default=config.PREF_FORMATTER_DEFAULT.name, - choices=[fmt_name for fmt_name in config.LOG_FORMATTERS.iterkeys()] ) - - general.add_option( "--index", dest="index_path", help="Index database path", - metavar="PATH", default=config.SEARCH_INDEX_PATH ) - - general.add_option( "--timezone", dest="timezone", help="Timezone for output", - metavar="TZ", type="timezone", default=pytz.utc ) - - general.add_option( "--force", dest="force", help="Force dangerous operation", - action="store_true" ) - - general.add_option( "--quiet", dest="quiet", help="Supress status messages", - action="store_true" ) - parser.add_option_group(general) - - - # cmd_load options # # # # - load = optparse.OptionGroup(parser, "Load Options") - load.add_option( "--skip-missing", dest="skip_missing", help="Skip missing logfiles", - action="store_true" ) - - load.add_option( "--create", dest="create", help="Create index database", - action="store_true" ) - parser.add_option_group(load) - - - # cmd_autoload options # # # # - autoload = optparse.OptionGroup(parser, "Autoload Options") - autoload.add_option( "--autoload-state", dest="autoload_state_path", help="Path to autoload state dir", - metavar="PATH", default=config.SEARCH_AUTOINDEX_PATH) - - autoload.add_option( "--from", dest="after", help="Only autoload logfiles from the given date on", - metavar="DATE", type="date", action="parse_date", default=None ) - - autoload.add_option( "--until", dest="until", help="Only autoload logfiles up to (and including) the given date", - metavar="DATE", type="date", action="parse_date", default=None ) - - autoload.add_option( "--reload", dest="reload", help="Force reload lines", - action="store_true" ) - - autoload.add_option( "--reset", dest="reset", help="Reset old autload state", - action="store_true" ) - - autoload.add_option( "--ignore-resume", dest="ignore_resume", help="Do not try and resume interrupted autoload", - action="store_true" ) - parser.add_option_group(autoload) - - # parse - options, args = parser.parse_args(argv[1:]) - - # postprocess stuff - options._parser = parser - options.formatter = config.LOG_FORMATTERS[options.formatter_name](options.timezone, "%H:%M:%S", None, None) - - # special-case --help - if options.help : - return cmd_help(options, *args) - - # must have at least the command argument - if not args : - raise CommandError("Missing command") - - # pop command - command = args.pop(0) - - # get func - func = globals().get('cmd_%s' % command) - - # unknown command? - if not func : - raise CommandError("Unknown command: %s" % command) - - # call - func(options, *args) - -if __name__ == '__main__' : - try : - main(sys.argv) - sys.exit(0) - - except CommandError, e : - print e - sys.exit(1) - diff -r 9c7769850195 -r 6db2527b67cf urls.py --- a/urls.py Sun Sep 13 00:49:55 2009 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,46 +0,0 @@ - -""" - URL mapping for the irclogs.qmsk.net site -""" - -# urltree stuff -from qmsk.web import urltree - -# our own handlers -import handlers - -# for types -import utils - -# for configuration -import config - -# our URLTypes -types = dict( - # LogChannel - cid = utils.URLChannelName(config.LOG_CHANNELS.dict()), - - # datetime - date = utils.URLDateType(config.URL_DATE_FMT), - - # UTC timestamp - ts = utils.URLTimestampType(), -) - -# our URLConfig -urls = url = urltree.URLConfig(type_dict=types) - -# urls -index = url('/', handlers.index ) -preferences = url('/preferences', handlers.preferences_ ) -channel_select = url('/channel_select/?channel:cid', handlers.channel_select ) -channel = url('/channels/{channel:cid}', handlers.channel_last, count=20 ) -channel_last = url('/channels/{channel:cid}/last/{count:int=100}/{type=}', handlers.channel_last ) -channel_link = url('/channels/{channel:cid}/link/{timestamp:ts}/?type=', handlers.channel_link ) -channel_calendar = url('/channels/{channel:cid}/calendar/{year:int=0}/{month:int=0}', handlers.channel_calendar ) -channel_date = url('/channels/{channel:cid}/date/{date:date}/?page:int=1&type=', handlers.channel_date ) -channel_search = url('/channels/{channel:cid}/search/?q=&page:int=1&max:int=1&type=&t:list=', handlers.channel_search ) - -# mapper -mapper = urltree.URLTree(urls) - diff -r 9c7769850195 -r 6db2527b67cf utils.py --- a/utils.py Sun Sep 13 00:49:55 2009 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,137 +0,0 @@ -""" - Miscellaneous things -""" - -import datetime, calendar, pytz -import os, errno - -from qmsk.web.urltree import URLType - -class URLChannelName (URLType) : - """ - Handle LogChannel names in URLs. Deals with instances of LogChannel - """ - - def __init__ (self, channels) : - """ - Use the given { name -> LogChannel } dict - """ - - self.channels = channels - - def parse (self, chan_name) : - """ - chan_name -> LogChannel - """ - - return self.channels[chan_name] - - def build (self, chan) : - """ - LogChannel -> chan_name - """ - - return chan.id - -class URLDateType (URLType) : - """ - Handle dates in URLs as naive datetime objects (with indeterminate time info) - """ - - def __init__ (self, date_fmt) : - """ - Format/parse dates using the given format - """ - - self.date_fmt = date_fmt - - def parse (self, date_str) : - """ - date_str -> naive datetime.datetime - """ - - return datetime.datetime.strptime(date_str, self.date_fmt) - - def build (self, date) : - """ - datetime.date -> date_str - """ - - return date.strftime(self.date_fmt) - -class URLTimestampType (URLType) : - """ - Handles an integer UNIX timestamp as an UTC datetime - """ - - def parse (self, timestamp_str) : - """ - timestamp_str -> pytz.utc datetime.datetime - """ - - return from_utc_timestamp(int(timestamp_str)) - - def build (self, dtz) : - """ - pytz.utc datetime.datetime -> timestamp_str - """ - - return str(to_utc_timestamp(dtz)) - -def from_utc_timestamp (timestamp) : - """ - Converts a UNIX timestamp into a datetime.datetime - """ - - return datetime.datetime.utcfromtimestamp(timestamp).replace(tzinfo=pytz.utc) - -def to_utc_timestamp (dt) : - """ - Converts a datetime.datetime into a UNIX timestamp - """ - - return calendar.timegm(dt.utctimetuple()) - -def mtime (path, ignore_missing=False) : - """ - Gets the mtime for the given path as an UTC datetime, or None, if the file doesn't exist and ignore_missing - """ - - try : - # stat - st = os.stat(path) - - # trap IOError - except os.error, e : - # ENOENT? - if ignore_missing and e.errno == errno.ENOENT : - return None - - else : - raise - - else : - # decode - return from_utc_timestamp(st.st_mtime) - -class FixedOffsetTimezone (pytz._FixedOffset) : - """ - A Fixed-offset timezone with no DST info, compatible with pytz. - - This is based on pytz._FixedOffset, but overrides dst() to return timedelta(0) - """ - - def __init__ (self, minutes) : - """ - Minutes is simply the offset from UTC in minutes, positive or negative, at most 24h. - """ - - pytz._FixedOffset.__init__(self, minutes) - - def dst (self, dt) : - """ - No DST info - """ - - return datetime.timedelta(0) - diff -r 9c7769850195 -r 6db2527b67cf version.py --- a/version.py Sun Sep 13 00:49:55 2009 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,106 +0,0 @@ -""" - Figuring out the project version - - Currently this only supports mercurial -""" - -# only load this once -_VERSION = None - -def version_mercurial (path) : - """ - Returns a (branch, tags, parents, modified) tuple for the given repo's working copy - """ - - global _VERSION - - # cached? - if _VERSION : - return _VERSION - - # code adapted from mercurial.commands.identify - from mercurial import ui, hg, encoding - from mercurial.node import short - - # open the repo - repo = hg.repository(ui.ui(), path) - - # the working copy change context - ctx = repo[None] - - # branch - branch = encoding.tolocal(ctx.branch()) - - # map default -> None - if branch == 'default' : - branch = None - - # list of tags, without 'tip' tag - tags = [tag for tag in ctx.tags() if tag != 'tip'] - - # ctx's parents - parents = [short(p.node()) for p in ctx.parents()] - - # local modifications? - modified = bool(ctx.files() + ctx.deleted()) - - # done - _VERSION = (branch, tags, parents, modified) - return _VERSION - -def version_string (path='.') : - """ - Return a version string representing the version of the software at the given path. - - Currently, this assumes that the given path points to a local Mercurial repo. - """ - - try : - # get info - branch, tags, parents, modified = version_mercurial(path) - - except : - # XXX: ignore - raise - - # tags: [ "-" [ ... ]] - if tags : - return '-'.join(tags) - - # revision: [ "+" [ ... ]] [ "+" ] - revision = '+'.join(p for p in parents) + ('+' if modified else '') - - if branch : - # branch: "(" ")" - return '(%s)%s' % (branch, revision) - - else : - # plain: - return revision - -def version_link_hg (hgweb_url, path='.') : - """ - Returns a link to a hgweb page for this version - """ - - # URL for revision ID - rev_url = lambda rev: '%(rev)s' % dict(url=hgweb_url, rev=rev) - - # get info - branch, tags, parents, modified = version_mercurial(path) - - # tags: [ "-" [ ... ]] [ "+" ] - if tags : - return '-'.join(rev_url(tag) for tag in tags) + ('+' if modified else '') - - # revision: [ "+" [ ... ]] [ "+" ] - revision = '+'.join(rev_url(p) for p in parents) + ('+' if modified else '') - - if branch : - # branch: "(" ")" [ "+" ] - return '(%s)%s' % (rev_url(branch), revision) + ('+' if modified else '') - - else : - # plain: - return revision - diff -r 9c7769850195 -r 6db2527b67cf wsgi.py --- a/wsgi.py Sun Sep 13 00:49:55 2009 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ -""" - Our custom WSGI application -""" - -from qmsk.web import wsgi - -import urls, error - -# our custom app with custom error() method -class Application (wsgi.Application) : - def __init__ (self) : - """ - Construct wsgi.Application with our URLMapper - """ - - super(Application, self).__init__(urls.mapper) - - def handle_error (self, exc_info, env, start_response) : - """ - Use error.build_error and return that - """ - - # get info - status, content_type, body = error.build_error(env=env) - - # headers - start_response(status, [('Content-type', content_type)], exc_info) - - # body - return body -