--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sites/irclogs.qmsk.net/channels.py Sun Feb 08 00:29:36 2009 +0200
@@ -0,0 +1,50 @@
+"""
+ Our list of LogChannels
+"""
+
+import pytz
+
+# for relpath
+import os.path
+
+from log_channel import LogChannel
+from log_source import LogDirectory
+
+relpath = lambda path : os.path.join(os.path.dirname(__file__), path)
+
+class ChannelList (object) :
+ """
+ The list of channels, and related methods
+ """
+
+ # the statically defined channel list
+ CHANNELS = {
+ 'tycoon': LogChannel('tycoon', "OFTC", "#tycoon",
+ LogDirectory(relpath('logs/tycoon'), pytz.timezone('Europe/Helsinki'))
+ ),
+ }
+
+ def __init__ (self, channels) :
+ """
+ Initialize with the given channel dict
+ """
+
+ self.channels = channels
+
+ def lookup (self, channel_name) :
+ """
+ Looks up the LogChannel for the given name
+ """
+
+ return self.channels[channel_name]
+
+ def __iter__ (self) :
+ """
+ Iterate over our defined LogChannel objects
+ """
+
+ return self.channels.itervalues()
+
+# the global singletone ChannelList...
+channel_list = ChannelList(ChannelList.CHANNELS)
+
--- a/sites/irclogs.qmsk.net/handlers.py Sat Feb 07 21:21:10 2009 +0200
+++ b/sites/irclogs.qmsk.net/handlers.py Sun Feb 08 00:29:36 2009 +0200
@@ -2,8 +2,9 @@
Our URL action handlers
"""
-from lib import template
+from lib import http, template
+# load templates from here
templates = template.TemplateLoader("sites/irclogs.qmsk.net/templates")
def index (request) :
@@ -19,17 +20,21 @@
"""
return templates.render_to_response("channel",
- channel_name = channel,
+ channel = channel,
)
pass
-def channel_last (request, channel, lines, type) :
+def channel_last (request, channel, count, format) :
"""
Display the last x lines of channel messages in various formats
"""
- pass
+ if format == 'txt' :
+ return http.Response('\n'.join(channel.source.get_latest(count)), 'text/plain')
+
+ else :
+ raise http.ResponseError("Unknown filetype %r" % format)
def channel_search (request, channel) :
"""
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sites/irclogs.qmsk.net/log_channel.py Sun Feb 08 00:29:36 2009 +0200
@@ -0,0 +1,19 @@
+"""
+ A channel represents a series of log events, stored in some log source
+"""
+
+class LogChannel (object) :
+ """
+ A single IRC channel, logged to some specific place
+ """
+
+ def __init__ (self, id, network, name, source) :
+ """
+ Initialize this channel from the given identifier key, network name, channel name, and LogSource
+ """
+
+ self.id = id
+ self.network = network
+ self.name = name
+ self.source = source
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sites/irclogs.qmsk.net/log_event.py Sun Feb 08 00:29:36 2009 +0200
@@ -0,0 +1,21 @@
+"""
+ An IRC logfile consists of a series of events, a.k.a. "lines"
+"""
+
+class LogEvent (object) :
+ """
+ An event on some specific channel
+ """
+
+ # the event ype
+ type = None
+
+ # the UTC timestamp of the event
+ timestamp = None
+
+ # the event source
+ source = None
+
+ # associated data (message, etc)
+ data = None
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sites/irclogs.qmsk.net/log_source.py Sun Feb 08 00:29:36 2009 +0200
@@ -0,0 +1,227 @@
+"""
+ A source of IRC log files
+"""
+
+import codecs
+from datetime import date, datetime, timedelta
+import pytz
+
+# for SEEK_*, errno
+import os, errno
+
+class LogSource (object) :
+ """
+ A collection of IRC logs for a specific target in some format. Provides the possibility to read specific events
+ """
+
+ def get_latest (self, count) :
+ """
+ Yield the latest events, up to `count` of them.
+ """
+
+ abstract
+
+class LogFile (LogSource) :
+ """
+ A file containing LogEvents
+ """
+
+ def __init__ (self, path, charset='utf-8', sep='\n') :
+ """
+ Open the file at the given path, which contains data of the given codec, as lines separated by the given separator
+ """
+
+ # store
+ self.path = path
+ self.charset = charset
+ self.sep = sep
+
+ # open
+ self.file = codecs.open(path, 'r', charset)
+
+ def __iter__ (self) :
+ """
+ Yields a series of lines, as read from the top of the file
+ """
+
+ # seek to beginning
+ self.file.seek(0)
+
+ # iterate over lines
+ return iter(self.file)
+
+ def get_latest (self, count) :
+ """
+ Returns up to <count> lines from the end of the file, or less, if the file doesn't contain that many lines
+ """
+
+ # the list of lines
+ lines = []
+
+ # seek to end of file
+ self.file.seek(0, os.SEEK_END)
+
+ # read offset
+ # XXX; why -2 ?
+ offset = self.file.tell() - 2
+
+ # use this blocksize
+ BLOCKSIZE = 1024
+
+ # trailing data
+ buf = ''
+
+ # read a block at a time, backwards
+ while count > 0 and offset >= 0:
+ # update offset
+ offset -= BLOCKSIZE
+
+ # normalize to zero
+ if offset < 0 :
+ offset = 0
+
+ # seek backwards one block
+ self.file.seek(offset)
+
+ # add the new block to our buffer
+ read_buf = self.file.read(BLOCKSIZE)
+
+ # make sure we got the right amount of data
+ assert len(read_buf) == BLOCKSIZE, "read(%d) -> %d" % (BLOCKSIZE, len(read_buf))
+
+ # add in our previous buf
+ buf = read_buf + buf
+
+ # split out lines
+ buf_lines = buf.split(self.sep)
+
+ # keep the first one as our buffer, as it's incomplete
+ buf = buf_lines[0]
+
+ # add up to count lines to our lines buffer
+ lines = buf_lines[1:count + 1] + lines
+
+ # update count
+ count -= (len(buf_lines) - 1)
+
+ # return the line list
+ return lines
+
+class LogDirectory (LogSource) :
+ """
+ A directory containing a series of timestamped LogFiles
+ """
+
+ def __init__ (self, path, tz, charset='utf-8', filename_fmt='%Y-%m-%d') :
+ """
+ Load the logfiles at the given path.
+
+ The files contain data in the given charset, and are named according the the date in the given timezone and
+ date format.
+ """
+
+ # store
+ self.path = path
+ self.tz = tz
+ self.charset = charset
+ self.filename_fmt = filename_fmt
+
+ def _get_logfile_datetime (self, dt) :
+ """
+ Get the logfile corresponding to the given datetime
+ """
+
+ # convert to target timezone
+ dtz = dt.astimezone(self.tz)
+
+ # convert to date and use that
+ return self._get_logfile_date(dtz.date())
+
+ def _get_logfile_date (self, d) :
+ """
+ Get the logfile corresponding to the given naive date in our timezone
+ """
+
+ # format filename
+ filename = d.strftime(self.filename_fmt)
+
+ # build path
+ path = os.path.join(self.path, filename)
+
+ # return the LogFile
+ return LogFile(path, self.charset)
+
+ def _iter_backwards (self, dt=None) :
+ """
+ Yields an infinite series of naive date objects in our timezone, iterating backwards in time starting at the
+ given *datetime*, or the the current date, if none given
+ """
+
+ # default to now
+ if not dt :
+ dt = datetime.now(pytz.utc)
+
+ # convert to target timezone
+ dtz = dt.astimezone(self.tz)
+
+ # our timedelta
+ ONE_DAY = timedelta(1)
+
+ # iterate unto infinity
+ while True :
+ # yield
+ yield dtz.date()
+
+ # one day sdrawkcab
+ dtz -= ONE_DAY
+
+ def get_latest (self, count) :
+ """
+ Uses _iter_backwards + _get_logfile_date to read the yield the given lines from as many logfiles as needed
+ """
+
+ # iterate backwards from now
+ day_iter = self._iter_backwards()
+
+ # number of files read
+ files = 0
+
+ # only read up to 100 files or so
+ MAX_FILES = 100
+
+ # loop until done
+ while count > 0 :
+ logfile = None
+
+ try :
+ # get next logfile
+ files += 1
+
+ # open
+ logfile = self._get_logfile_date(day_iter.next())
+
+ except IOError, e :
+ # skip nonexistant days if we haven't found any logs yet
+ if e.errno != errno.ENOENT :
+ raise
+
+ if files > MAX_FILES :
+ raise Exception("No recent logfiles found")
+
+ else :
+ # skip to next day
+ continue
+
+ # yield lines
+ for line in logfile.get_latest(count) :
+ # yield while we still need to, otherwise, stop
+ if count > 0 :
+ # decrement
+ count -= 1
+
+ yield line
+
+ else :
+ break
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sites/irclogs.qmsk.net/logs/tycoon Sun Feb 08 00:29:36 2009 +0200
@@ -0,0 +1,1 @@
+/home/terom/backups/zapotek-irclogs/#tycoon
\ No newline at end of file
--- a/sites/irclogs.qmsk.net/templates/channel.tmpl Sat Feb 07 21:21:10 2009 +0200
+++ b/sites/irclogs.qmsk.net/templates/channel.tmpl Sun Feb 08 00:29:36 2009 +0200
@@ -1,2 +1,9 @@
-<h1>Channel ${channel_name}</h1>
+<h1>Channel ${channel.name}</h1>
+<h2>Last 10 lines:</h2>
+<pre>
+% for line in channel.source.get_latest(10) :
+${line}
+% endfor
+</pre>
+
--- a/sites/irclogs.qmsk.net/urls.py Sat Feb 07 21:21:10 2009 +0200
+++ b/sites/irclogs.qmsk.net/urls.py Sun Feb 08 00:29:36 2009 +0200
@@ -4,17 +4,34 @@
"""
# urltree stuff
-from urltree import URL, URLTree
+from urltree import URLConfig, URL, URLTree
# our own handlers
import handlers
+# for types
+import channels
+
+# our URLConfig
+url_config = URLConfig(
+ type_dict = {
+ # lookup LogChannel
+ 'cid': channels.channel_list.lookup
+ }
+)
+
+# shortcut for building an URL with our url_config
+def url (*args, **kwargs) :
+ return URL(url_config, *args, **kwargs)
+
# urls
-index = URL( '/', handlers.index )
-channel_view = URL( '/channel/{channel}', handlers.channel_view )
-channel_last = URL( '/channel/{channel}/last/{count:int=100}/{format=html}', handlers.channel_last )
-channel_search = URL( '/channel/{channel}/search', handlers.channel_search )
+index = url('/', handlers.index )
+channel_view = url('/channels/{channel:cid}', handlers.channel_view )
+channel_last = url('/channels/{channel:cid}/last/{count:int=100}/{format=html}', handlers.channel_last )
+channel_search = url('/channels/{channel:cid}/search', handlers.channel_search )
# mapper
-mapper = URLTree([index, channel_view, channel_last, channel_search])
+mapper = URLTree(
+ [index, channel_view, channel_last, channel_search]
+)
--- a/sites/irclogs.qmsk.net/urltree.py Sat Feb 07 21:21:10 2009 +0200
+++ b/sites/irclogs.qmsk.net/urltree.py Sun Feb 08 00:29:36 2009 +0200
@@ -40,7 +40,7 @@
"""
@staticmethod
- def parse (mask, defaults) :
+ def parse (mask, defaults, types) :
"""
Parse the given label-segment, and return a *Label instance
"""
@@ -59,12 +59,8 @@
# type
type = match.group("type")
- if type :
- # XXX: resolve using eval() for now, should be a module or something
- type = eval(type)
-
- else :
- type = str
+ # lookup type, None for default
+ type = types[type]
# defaults?
default = defaults.get(key)
@@ -220,7 +216,7 @@
value = self.type(value)
except Exception, e :
- raise URLError("Bad value %r for type %s: %s" % (value, self.type.__name__, e))
+ raise URLError("Bad value %r for type %s: %s: %s" % (value, self.type.__name__, type(e).__name__, e))
return LabelValue(self, value)
@@ -230,24 +226,55 @@
':%s' % (self.type.__name__ ) if self.type != str else '',
'=%s' % (self.default, ) if self.default else '',
)
-
+
+class URLConfig (object) :
+ """
+ Global configuration relevant to all URLs
+ """
+
+ # built-in type codes
+ BUILTIN_TYPES = {
+ # default
+ None : str,
+
+ # integer
+ 'int' : int,
+ }
+
+ def __init__ (self, type_dict=None) :
+ """
+ Create an URLConfig for use with URL
+
+ If type_dict is given, it should be a mapping of type names -> callables, and they will be available for
+ type specifications in addition to the defaults.
+ """
+
+ # build our type_dict
+ self.type_dict = self.BUILTIN_TYPES.copy()
+
+ # apply the given type_dict
+ if type_dict :
+ self.type_dict.update(type_dict)
+
class URL (object) :
"""
Represents a specific URL
"""
- def __init__ (self, url_mask, handler, **defaults) :
+
+ def __init__ (self, config, url_mask, handler, type_dict=None, **defaults) :
"""
- Create an URL with the given url mask, handler, and default values
+ Create an URL using the given URLConfig, with the given url mask, handler, and default values.
"""
# store
+ self.config = config
self.url_mask = url_mask
self.handler = handler
self.defaults = defaults
# build our labels
- self.label_path = [Label.parse(mask, defaults) for mask in url_mask.split('/')]
+ self.label_path = [Label.parse(mask, defaults, config.type_dict) for mask in url_mask.split('/')]
def get_label_path (self) :
"""