# HG changeset patch # User Tero Marttila # Date 1234045776 -7200 # Node ID 9585441a4bfbf4f64e5a7bc71531fa95b640cff5 # Parent 71ab68f31a1c98e5361d2da6f72dfc2f5dafce87 working basic logs stuff diff -r 71ab68f31a1c -r 9585441a4bfb sites/irclogs.qmsk.net/channels.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sites/irclogs.qmsk.net/channels.py Sun Feb 08 00:29:36 2009 +0200 @@ -0,0 +1,50 @@ +""" + Our list of LogChannels +""" + +import pytz + +# for relpath +import os.path + +from log_channel import LogChannel +from log_source import LogDirectory + +relpath = lambda path : os.path.join(os.path.dirname(__file__), path) + +class ChannelList (object) : + """ + The list of channels, and related methods + """ + + # the statically defined channel list + CHANNELS = { + 'tycoon': LogChannel('tycoon', "OFTC", "#tycoon", + LogDirectory(relpath('logs/tycoon'), pytz.timezone('Europe/Helsinki')) + ), + } + + def __init__ (self, channels) : + """ + Initialize with the given channel dict + """ + + self.channels = channels + + def lookup (self, channel_name) : + """ + Looks up the LogChannel for the given name + """ + + return self.channels[channel_name] + + def __iter__ (self) : + """ + Iterate over our defined LogChannel objects + """ + + return self.channels.itervalues() + +# the global singletone ChannelList... +channel_list = ChannelList(ChannelList.CHANNELS) + diff -r 71ab68f31a1c -r 9585441a4bfb sites/irclogs.qmsk.net/handlers.py --- a/sites/irclogs.qmsk.net/handlers.py Sat Feb 07 21:21:10 2009 +0200 +++ b/sites/irclogs.qmsk.net/handlers.py Sun Feb 08 00:29:36 2009 +0200 @@ -2,8 +2,9 @@ Our URL action handlers """ -from lib import template +from lib import http, template +# load templates from here templates = template.TemplateLoader("sites/irclogs.qmsk.net/templates") def index (request) : @@ -19,17 +20,21 @@ """ return templates.render_to_response("channel", - channel_name = channel, + channel = channel, ) pass -def channel_last (request, channel, lines, type) : +def channel_last (request, channel, count, format) : """ Display the last x lines of channel messages in various formats """ - pass + if format == 'txt' : + return http.Response('\n'.join(channel.source.get_latest(count)), 'text/plain') + + else : + raise http.ResponseError("Unknown filetype %r" % format) def channel_search (request, channel) : """ diff -r 71ab68f31a1c -r 9585441a4bfb sites/irclogs.qmsk.net/log_channel.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sites/irclogs.qmsk.net/log_channel.py Sun Feb 08 00:29:36 2009 +0200 @@ -0,0 +1,19 @@ +""" + A channel represents a series of log events, stored in some log source +""" + +class LogChannel (object) : + """ + A single IRC channel, logged to some specific place + """ + + def __init__ (self, id, network, name, source) : + """ + Initialize this channel from the given identifier key, network name, channel name, and LogSource + """ + + self.id = id + self.network = network + self.name = name + self.source = source + diff -r 71ab68f31a1c -r 9585441a4bfb sites/irclogs.qmsk.net/log_event.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sites/irclogs.qmsk.net/log_event.py Sun Feb 08 00:29:36 2009 +0200 @@ -0,0 +1,21 @@ +""" + An IRC logfile consists of a series of events, a.k.a. "lines" +""" + +class LogEvent (object) : + """ + An event on some specific channel + """ + + # the event ype + type = None + + # the UTC timestamp of the event + timestamp = None + + # the event source + source = None + + # associated data (message, etc) + data = None + diff -r 71ab68f31a1c -r 9585441a4bfb sites/irclogs.qmsk.net/log_source.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sites/irclogs.qmsk.net/log_source.py Sun Feb 08 00:29:36 2009 +0200 @@ -0,0 +1,227 @@ +""" + A source of IRC log files +""" + +import codecs +from datetime import date, datetime, timedelta +import pytz + +# for SEEK_*, errno +import os, errno + +class LogSource (object) : + """ + A collection of IRC logs for a specific target in some format. Provides the possibility to read specific events + """ + + def get_latest (self, count) : + """ + Yield the latest events, up to `count` of them. + """ + + abstract + +class LogFile (LogSource) : + """ + A file containing LogEvents + """ + + def __init__ (self, path, charset='utf-8', sep='\n') : + """ + Open the file at the given path, which contains data of the given codec, as lines separated by the given separator + """ + + # store + self.path = path + self.charset = charset + self.sep = sep + + # open + self.file = codecs.open(path, 'r', charset) + + def __iter__ (self) : + """ + Yields a series of lines, as read from the top of the file + """ + + # seek to beginning + self.file.seek(0) + + # iterate over lines + return iter(self.file) + + def get_latest (self, count) : + """ + Returns up to lines from the end of the file, or less, if the file doesn't contain that many lines + """ + + # the list of lines + lines = [] + + # seek to end of file + self.file.seek(0, os.SEEK_END) + + # read offset + # XXX; why -2 ? + offset = self.file.tell() - 2 + + # use this blocksize + BLOCKSIZE = 1024 + + # trailing data + buf = '' + + # read a block at a time, backwards + while count > 0 and offset >= 0: + # update offset + offset -= BLOCKSIZE + + # normalize to zero + if offset < 0 : + offset = 0 + + # seek backwards one block + self.file.seek(offset) + + # add the new block to our buffer + read_buf = self.file.read(BLOCKSIZE) + + # make sure we got the right amount of data + assert len(read_buf) == BLOCKSIZE, "read(%d) -> %d" % (BLOCKSIZE, len(read_buf)) + + # add in our previous buf + buf = read_buf + buf + + # split out lines + buf_lines = buf.split(self.sep) + + # keep the first one as our buffer, as it's incomplete + buf = buf_lines[0] + + # add up to count lines to our lines buffer + lines = buf_lines[1:count + 1] + lines + + # update count + count -= (len(buf_lines) - 1) + + # return the line list + return lines + +class LogDirectory (LogSource) : + """ + A directory containing a series of timestamped LogFiles + """ + + def __init__ (self, path, tz, charset='utf-8', filename_fmt='%Y-%m-%d') : + """ + Load the logfiles at the given path. + + The files contain data in the given charset, and are named according the the date in the given timezone and + date format. + """ + + # store + self.path = path + self.tz = tz + self.charset = charset + self.filename_fmt = filename_fmt + + def _get_logfile_datetime (self, dt) : + """ + Get the logfile corresponding to the given datetime + """ + + # convert to target timezone + dtz = dt.astimezone(self.tz) + + # convert to date and use that + return self._get_logfile_date(dtz.date()) + + def _get_logfile_date (self, d) : + """ + Get the logfile corresponding to the given naive date in our timezone + """ + + # format filename + filename = d.strftime(self.filename_fmt) + + # build path + path = os.path.join(self.path, filename) + + # return the LogFile + return LogFile(path, self.charset) + + def _iter_backwards (self, dt=None) : + """ + Yields an infinite series of naive date objects in our timezone, iterating backwards in time starting at the + given *datetime*, or the the current date, if none given + """ + + # default to now + if not dt : + dt = datetime.now(pytz.utc) + + # convert to target timezone + dtz = dt.astimezone(self.tz) + + # our timedelta + ONE_DAY = timedelta(1) + + # iterate unto infinity + while True : + # yield + yield dtz.date() + + # one day sdrawkcab + dtz -= ONE_DAY + + def get_latest (self, count) : + """ + Uses _iter_backwards + _get_logfile_date to read the yield the given lines from as many logfiles as needed + """ + + # iterate backwards from now + day_iter = self._iter_backwards() + + # number of files read + files = 0 + + # only read up to 100 files or so + MAX_FILES = 100 + + # loop until done + while count > 0 : + logfile = None + + try : + # get next logfile + files += 1 + + # open + logfile = self._get_logfile_date(day_iter.next()) + + except IOError, e : + # skip nonexistant days if we haven't found any logs yet + if e.errno != errno.ENOENT : + raise + + if files > MAX_FILES : + raise Exception("No recent logfiles found") + + else : + # skip to next day + continue + + # yield lines + for line in logfile.get_latest(count) : + # yield while we still need to, otherwise, stop + if count > 0 : + # decrement + count -= 1 + + yield line + + else : + break + + diff -r 71ab68f31a1c -r 9585441a4bfb sites/irclogs.qmsk.net/logs/tycoon --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sites/irclogs.qmsk.net/logs/tycoon Sun Feb 08 00:29:36 2009 +0200 @@ -0,0 +1,1 @@ +/home/terom/backups/zapotek-irclogs/#tycoon \ No newline at end of file diff -r 71ab68f31a1c -r 9585441a4bfb sites/irclogs.qmsk.net/templates/channel.tmpl --- a/sites/irclogs.qmsk.net/templates/channel.tmpl Sat Feb 07 21:21:10 2009 +0200 +++ b/sites/irclogs.qmsk.net/templates/channel.tmpl Sun Feb 08 00:29:36 2009 +0200 @@ -1,2 +1,9 @@ -

Channel ${channel_name}

+

Channel ${channel.name}

+

Last 10 lines:

+
+% for line in channel.source.get_latest(10) :
+${line}
+% endfor
+
+ diff -r 71ab68f31a1c -r 9585441a4bfb sites/irclogs.qmsk.net/urls.py --- a/sites/irclogs.qmsk.net/urls.py Sat Feb 07 21:21:10 2009 +0200 +++ b/sites/irclogs.qmsk.net/urls.py Sun Feb 08 00:29:36 2009 +0200 @@ -4,17 +4,34 @@ """ # urltree stuff -from urltree import URL, URLTree +from urltree import URLConfig, URL, URLTree # our own handlers import handlers +# for types +import channels + +# our URLConfig +url_config = URLConfig( + type_dict = { + # lookup LogChannel + 'cid': channels.channel_list.lookup + } +) + +# shortcut for building an URL with our url_config +def url (*args, **kwargs) : + return URL(url_config, *args, **kwargs) + # urls -index = URL( '/', handlers.index ) -channel_view = URL( '/channel/{channel}', handlers.channel_view ) -channel_last = URL( '/channel/{channel}/last/{count:int=100}/{format=html}', handlers.channel_last ) -channel_search = URL( '/channel/{channel}/search', handlers.channel_search ) +index = url('/', handlers.index ) +channel_view = url('/channels/{channel:cid}', handlers.channel_view ) +channel_last = url('/channels/{channel:cid}/last/{count:int=100}/{format=html}', handlers.channel_last ) +channel_search = url('/channels/{channel:cid}/search', handlers.channel_search ) # mapper -mapper = URLTree([index, channel_view, channel_last, channel_search]) +mapper = URLTree( + [index, channel_view, channel_last, channel_search] +) diff -r 71ab68f31a1c -r 9585441a4bfb sites/irclogs.qmsk.net/urltree.py --- a/sites/irclogs.qmsk.net/urltree.py Sat Feb 07 21:21:10 2009 +0200 +++ b/sites/irclogs.qmsk.net/urltree.py Sun Feb 08 00:29:36 2009 +0200 @@ -40,7 +40,7 @@ """ @staticmethod - def parse (mask, defaults) : + def parse (mask, defaults, types) : """ Parse the given label-segment, and return a *Label instance """ @@ -59,12 +59,8 @@ # type type = match.group("type") - if type : - # XXX: resolve using eval() for now, should be a module or something - type = eval(type) - - else : - type = str + # lookup type, None for default + type = types[type] # defaults? default = defaults.get(key) @@ -220,7 +216,7 @@ value = self.type(value) except Exception, e : - raise URLError("Bad value %r for type %s: %s" % (value, self.type.__name__, e)) + raise URLError("Bad value %r for type %s: %s: %s" % (value, self.type.__name__, type(e).__name__, e)) return LabelValue(self, value) @@ -230,24 +226,55 @@ ':%s' % (self.type.__name__ ) if self.type != str else '', '=%s' % (self.default, ) if self.default else '', ) - + +class URLConfig (object) : + """ + Global configuration relevant to all URLs + """ + + # built-in type codes + BUILTIN_TYPES = { + # default + None : str, + + # integer + 'int' : int, + } + + def __init__ (self, type_dict=None) : + """ + Create an URLConfig for use with URL + + If type_dict is given, it should be a mapping of type names -> callables, and they will be available for + type specifications in addition to the defaults. + """ + + # build our type_dict + self.type_dict = self.BUILTIN_TYPES.copy() + + # apply the given type_dict + if type_dict : + self.type_dict.update(type_dict) + class URL (object) : """ Represents a specific URL """ - def __init__ (self, url_mask, handler, **defaults) : + + def __init__ (self, config, url_mask, handler, type_dict=None, **defaults) : """ - Create an URL with the given url mask, handler, and default values + Create an URL using the given URLConfig, with the given url mask, handler, and default values. """ # store + self.config = config self.url_mask = url_mask self.handler = handler self.defaults = defaults # build our labels - self.label_path = [Label.parse(mask, defaults) for mask in url_mask.split('/')] + self.label_path = [Label.parse(mask, defaults, config.type_dict) for mask in url_mask.split('/')] def get_label_path (self) : """