working basic logs stuff sites
authorTero Marttila <terom@fixme.fi>
Sun, 08 Feb 2009 00:29:36 +0200
branchsites
changeset 41 9585441a4bfb
parent 40 71ab68f31a1c
child 42 5a72c00c4ae4
working basic logs stuff
sites/irclogs.qmsk.net/channels.py
sites/irclogs.qmsk.net/handlers.py
sites/irclogs.qmsk.net/log_channel.py
sites/irclogs.qmsk.net/log_event.py
sites/irclogs.qmsk.net/log_source.py
sites/irclogs.qmsk.net/logs/tycoon
sites/irclogs.qmsk.net/templates/channel.tmpl
sites/irclogs.qmsk.net/urls.py
sites/irclogs.qmsk.net/urltree.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sites/irclogs.qmsk.net/channels.py	Sun Feb 08 00:29:36 2009 +0200
@@ -0,0 +1,50 @@
+"""
+    Our list of LogChannels
+"""
+
+import pytz
+
+# for relpath
+import os.path
+
+from log_channel import LogChannel
+from log_source import LogDirectory
+
+relpath = lambda path : os.path.join(os.path.dirname(__file__), path)
+
+class ChannelList (object) :
+    """
+        The list of channels, and related methods
+    """
+    
+    # the statically defined channel list
+    CHANNELS = {
+        'tycoon':   LogChannel('tycoon', "OFTC", "#tycoon", 
+            LogDirectory(relpath('logs/tycoon'), pytz.timezone('Europe/Helsinki'))
+        ),
+    }
+
+    def __init__ (self, channels) :
+        """
+            Initialize with the given channel dict
+        """
+
+        self.channels = channels
+
+    def lookup (self, channel_name) :
+        """
+            Looks up the LogChannel for the given name
+        """
+
+        return self.channels[channel_name]
+
+    def __iter__ (self) :
+        """
+            Iterate over our defined LogChannel objects
+        """
+
+        return self.channels.itervalues()
+
+# the global singletone ChannelList...
+channel_list = ChannelList(ChannelList.CHANNELS)
+
--- a/sites/irclogs.qmsk.net/handlers.py	Sat Feb 07 21:21:10 2009 +0200
+++ b/sites/irclogs.qmsk.net/handlers.py	Sun Feb 08 00:29:36 2009 +0200
@@ -2,8 +2,9 @@
     Our URL action handlers
 """
 
-from lib import template
+from lib import http, template
 
+# load templates from here
 templates = template.TemplateLoader("sites/irclogs.qmsk.net/templates")
 
 def index (request) :
@@ -19,17 +20,21 @@
     """
 
     return templates.render_to_response("channel",
-        channel_name        = channel,
+        channel         = channel,
     )
 
     pass
 
-def channel_last (request, channel, lines, type) :
+def channel_last (request, channel, count, format) :
     """
         Display the last x lines of channel messages in various formats
     """
 
-    pass
+    if format == 'txt' :
+        return http.Response('\n'.join(channel.source.get_latest(count)), 'text/plain')
+    
+    else :
+        raise http.ResponseError("Unknown filetype %r" % format)
 
 def channel_search (request, channel) :
     """
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sites/irclogs.qmsk.net/log_channel.py	Sun Feb 08 00:29:36 2009 +0200
@@ -0,0 +1,19 @@
+"""
+    A channel represents a series of log events, stored in some log source
+"""
+
+class LogChannel (object) :
+    """
+        A single IRC channel, logged to some specific place
+    """
+
+    def __init__ (self, id, network, name, source) :
+        """
+            Initialize this channel from the given identifier key, network name, channel name, and LogSource
+        """
+        
+        self.id = id
+        self.network = network
+        self.name = name
+        self.source = source
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sites/irclogs.qmsk.net/log_event.py	Sun Feb 08 00:29:36 2009 +0200
@@ -0,0 +1,21 @@
+"""
+    An IRC logfile consists of a series of events, a.k.a. "lines"
+"""
+
+class LogEvent (object) :
+    """
+        An event on some specific channel
+    """
+
+    # the event ype
+    type = None
+
+    # the UTC timestamp of the event
+    timestamp = None
+
+    # the event source
+    source = None
+
+    # associated data (message, etc)
+    data = None
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sites/irclogs.qmsk.net/log_source.py	Sun Feb 08 00:29:36 2009 +0200
@@ -0,0 +1,227 @@
+"""
+    A source of IRC log files
+"""
+
+import codecs
+from datetime import date, datetime, timedelta
+import pytz
+
+# for SEEK_*, errno
+import os, errno
+
+class LogSource (object) :
+    """
+        A collection of IRC logs for a specific target in some format. Provides the possibility to read specific events
+    """
+    
+    def get_latest (self, count) :
+        """
+            Yield the latest events, up to `count` of them.
+        """
+
+        abstract
+
+class LogFile (LogSource) :
+    """
+        A file containing LogEvents
+    """
+
+    def __init__ (self, path, charset='utf-8', sep='\n') :
+        """
+            Open the file at the given path, which contains data of the given codec, as lines separated by the given separator
+        """
+        
+        # store
+        self.path = path
+        self.charset = charset
+        self.sep = sep
+
+        # open
+        self.file = codecs.open(path, 'r', charset)
+    
+    def __iter__ (self) :
+        """
+            Yields a series of lines, as read from the top of the file
+        """
+        
+        # seek to beginning
+        self.file.seek(0)
+
+        # iterate over lines
+        return iter(self.file)
+    
+    def get_latest (self, count) :
+        """
+            Returns up to <count> lines from the end of the file, or less, if the file doesn't contain that many lines
+        """
+
+        # the list of lines
+        lines = []
+
+        # seek to end of file
+        self.file.seek(0, os.SEEK_END)
+
+        # read offset
+        # XXX; why -2 ?
+        offset = self.file.tell() - 2
+
+        # use this blocksize
+        BLOCKSIZE = 1024
+
+        # trailing data
+        buf = ''
+
+        # read a block at a time, backwards
+        while  count > 0 and offset >= 0:
+            # update offset
+            offset -= BLOCKSIZE
+
+            # normalize to zero
+            if offset < 0 :
+                offset = 0
+
+            # seek backwards one block
+            self.file.seek(offset)
+
+            # add the new block to our buffer
+            read_buf = self.file.read(BLOCKSIZE)
+
+            # make sure we got the right amount of data
+            assert len(read_buf) == BLOCKSIZE, "read(%d) -> %d" % (BLOCKSIZE, len(read_buf))
+
+            # add in our previous buf
+            buf = read_buf + buf
+            
+            # split out lines
+            buf_lines = buf.split(self.sep)
+
+            # keep the first one as our buffer, as it's incomplete
+            buf = buf_lines[0]
+
+            # add up to count lines to our lines buffer
+            lines = buf_lines[1:count + 1] + lines
+
+            # update count
+            count -= (len(buf_lines) - 1)
+
+        # return the line list
+        return lines
+
+class LogDirectory (LogSource) :
+    """
+        A directory containing a series of timestamped LogFiles
+    """
+
+    def __init__ (self, path, tz, charset='utf-8', filename_fmt='%Y-%m-%d') :
+        """
+            Load the logfiles at the given path.
+            
+            The files contain data in the given charset, and are named according the the date in the given timezone and
+            date format.
+        """
+
+        # store
+        self.path = path
+        self.tz = tz
+        self.charset = charset
+        self.filename_fmt = filename_fmt
+
+    def _get_logfile_datetime (self, dt) :
+        """
+            Get the logfile corresponding to the given datetime
+        """
+
+        # convert to target timezone
+        dtz = dt.astimezone(self.tz)
+        
+        # convert to date and use that
+        return self._get_logfile_date(dtz.date())
+
+    def _get_logfile_date (self, d) :
+        """
+            Get the logfile corresponding to the given naive date in our timezone
+        """
+
+        # format filename
+        filename = d.strftime(self.filename_fmt)
+
+        # build path
+        path = os.path.join(self.path, filename)
+
+        # return the LogFile
+        return LogFile(path, self.charset)
+    
+    def _iter_backwards (self, dt=None) :
+        """
+            Yields an infinite series of naive date objects in our timezone, iterating backwards in time starting at the
+            given *datetime*, or the the current date, if none given
+        """
+        
+        # default to now
+        if not dt :
+            dt = datetime.now(pytz.utc)
+        
+        # convert to target timezone
+        dtz = dt.astimezone(self.tz)
+
+        # our timedelta
+        ONE_DAY = timedelta(1)
+        
+        # iterate unto infinity
+        while True :
+            # yield
+            yield dtz.date()
+            
+            # one day sdrawkcab
+            dtz -= ONE_DAY
+    
+    def get_latest (self, count) :
+        """
+            Uses _iter_backwards + _get_logfile_date to read the yield the given lines from as many logfiles as needed
+        """
+        
+        # iterate backwards from now
+        day_iter = self._iter_backwards()
+
+        # number of files read
+        files = 0
+
+        # only read up to 100 files or so
+        MAX_FILES = 100
+        
+        # loop until done
+        while count > 0 :
+            logfile = None
+
+            try :
+                # get next logfile
+                files += 1
+                
+                # open
+                logfile = self._get_logfile_date(day_iter.next())
+            
+            except IOError, e :
+                # skip nonexistant days if we haven't found any logs yet
+                if e.errno != errno.ENOENT :
+                    raise
+
+                if files > MAX_FILES :
+                    raise Exception("No recent logfiles found")
+                
+                else :
+                    # skip to next day
+                    continue
+
+            # yield lines
+            for line in logfile.get_latest(count) :
+                # yield while we still need to, otherwise, stop
+                if count > 0 :
+                    # decrement
+                    count -= 1
+ 
+                    yield line
+            
+                else :
+                    break
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sites/irclogs.qmsk.net/logs/tycoon	Sun Feb 08 00:29:36 2009 +0200
@@ -0,0 +1,1 @@
+/home/terom/backups/zapotek-irclogs/#tycoon
\ No newline at end of file
--- a/sites/irclogs.qmsk.net/templates/channel.tmpl	Sat Feb 07 21:21:10 2009 +0200
+++ b/sites/irclogs.qmsk.net/templates/channel.tmpl	Sun Feb 08 00:29:36 2009 +0200
@@ -1,2 +1,9 @@
-<h1>Channel ${channel_name}</h1>
+<h1>Channel ${channel.name}</h1>
 
+<h2>Last 10 lines:</h2>
+<pre>
+% for line in channel.source.get_latest(10) :
+${line}
+% endfor
+</pre>
+
--- a/sites/irclogs.qmsk.net/urls.py	Sat Feb 07 21:21:10 2009 +0200
+++ b/sites/irclogs.qmsk.net/urls.py	Sun Feb 08 00:29:36 2009 +0200
@@ -4,17 +4,34 @@
 """
 
 # urltree stuff
-from urltree import URL, URLTree
+from urltree import URLConfig, URL, URLTree
 
 # our own handlers
 import handlers
 
+# for types
+import channels 
+
+# our URLConfig
+url_config = URLConfig(
+    type_dict   = { 
+        # lookup LogChannel
+        'cid': channels.channel_list.lookup 
+    }
+)
+
+# shortcut for building an URL with our url_config
+def url (*args, **kwargs) :
+    return URL(url_config, *args, **kwargs)
+
 # urls
-index           = URL(  '/',                                                        handlers.index                  )
-channel_view    = URL(  '/channel/{channel}',                                       handlers.channel_view           )
-channel_last    = URL(  '/channel/{channel}/last/{count:int=100}/{format=html}',    handlers.channel_last           )
-channel_search  = URL(  '/channel/{channel}/search',                                handlers.channel_search         )
+index           = url('/',                                                              handlers.index                  )
+channel_view    = url('/channels/{channel:cid}',                                        handlers.channel_view           )
+channel_last    = url('/channels/{channel:cid}/last/{count:int=100}/{format=html}',     handlers.channel_last           )
+channel_search  = url('/channels/{channel:cid}/search',                                 handlers.channel_search         )
 
 # mapper
-mapper = URLTree([index, channel_view, channel_last, channel_search])
+mapper = URLTree(
+    [index, channel_view, channel_last, channel_search]
+)
 
--- a/sites/irclogs.qmsk.net/urltree.py	Sat Feb 07 21:21:10 2009 +0200
+++ b/sites/irclogs.qmsk.net/urltree.py	Sun Feb 08 00:29:36 2009 +0200
@@ -40,7 +40,7 @@
     """
 
     @staticmethod
-    def parse (mask, defaults) :
+    def parse (mask, defaults, types) :
         """
             Parse the given label-segment, and return a *Label instance
         """
@@ -59,12 +59,8 @@
             # type
             type = match.group("type")
             
-            if type :
-                # XXX: resolve using eval() for now, should be a module or something
-                type = eval(type)
-
-            else :
-                type = str
+            # lookup type, None for default
+            type = types[type]
 
             # defaults?
             default = defaults.get(key)
@@ -220,7 +216,7 @@
                 value = self.type(value)
 
             except Exception, e :
-                raise URLError("Bad value %r for type %s: %s" % (value, self.type.__name__, e))
+                raise URLError("Bad value %r for type %s: %s: %s" % (value, self.type.__name__, type(e).__name__, e))
 
             return LabelValue(self, value)
 
@@ -230,24 +226,55 @@
             ':%s' % (self.type.__name__ ) if self.type != str else '',
             '=%s' % (self.default, ) if self.default else '',
         )
-            
+
+class URLConfig (object) :
+    """
+        Global configuration relevant to all URLs
+    """
+
+    # built-in type codes
+    BUILTIN_TYPES = {
+        # default
+        None    : str,
+
+        # integer
+        'int'   : int,
+    }
+
+    def __init__ (self, type_dict=None) :
+        """
+            Create an URLConfig for use with URL
+
+            If type_dict is given, it should be a mapping of type names -> callables, and they will be available for
+            type specifications in addition to the defaults.
+        """
+
+        # build our type_dict
+        self.type_dict = self.BUILTIN_TYPES.copy()
+        
+        # apply the given type_dict
+        if type_dict :
+            self.type_dict.update(type_dict)
+
 class URL (object) :
     """
         Represents a specific URL
     """
 
-    def __init__ (self, url_mask, handler, **defaults) :
+
+    def __init__ (self, config, url_mask, handler, type_dict=None, **defaults) :
         """
-            Create an URL with the given url mask, handler, and default values
+            Create an URL using the given URLConfig, with the given url mask, handler, and default values.
         """
 
         # store
+        self.config = config
         self.url_mask = url_mask
         self.handler = handler
         self.defaults = defaults
 
         # build our labels
-        self.label_path = [Label.parse(mask, defaults) for mask in url_mask.split('/')]
+        self.label_path = [Label.parse(mask, defaults, config.type_dict) for mask in url_mask.split('/')]
         
     def get_label_path (self) :
         """