sites/irclogs.qmsk.net/log_source.py
changeset 46 185504387370
parent 45 e94ab812c0c8
child 47 3d59c9eeffaa
--- a/sites/irclogs.qmsk.net/log_source.py	Sun Feb 08 03:13:11 2009 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,231 +0,0 @@
-"""
-    A source of IRC log files
-"""
-
-import codecs
-from datetime import date, datetime, timedelta
-import pytz
-
-# for SEEK_*, errno
-import os, errno
-
-class LogSource (object) :
-    """
-        A collection of IRC logs for a specific target in some format. Provides the possibility to read specific events
-    """
-    
-    def get_latest (self, count) :
-        """
-            Yield the latest events, up to `count` of them.
-        """
-
-        abstract
-
-class LogFile (LogSource) :
-    """
-        A file containing LogEvents
-    """
-
-    def __init__ (self, path, charset='utf-8', sep='\n') :
-        """
-            Open the file at the given path, which contains data of the given codec, as lines separated by the given separator
-        """
-        
-        # store
-        self.path = path
-        self.charset = charset
-        self.sep = sep
-
-        # open
-        self.file = codecs.open(path, 'r', charset)
-    
-    def __iter__ (self) :
-        """
-            Yields a series of lines, as read from the top of the file
-        """
-        
-        # seek to beginning
-        self.file.seek(0)
-
-        # iterate over lines
-        return iter(self.file)
-    
-    def get_latest (self, count) :
-        """
-            Returns up to <count> lines from the end of the file, or less, if the file doesn't contain that many lines
-        """
-
-        # the list of lines
-        lines = []
-
-        # seek to end of file
-        self.file.seek(0, os.SEEK_END)
-
-        # read offset
-        # XXX; why -2 ?
-        size = offset = self.file.tell() - 2
-
-        # use this blocksize
-        BLOCKSIZE = 1024
-
-        # trailing data
-        buf = ''
-
-        # read a block at a time, backwards
-        while  count > 0 and offset >= 0:
-            # update offset back one block
-            offset -= BLOCKSIZE
-
-            # normalize to zero
-            if offset < 0 :
-                offset = 0
-
-            # seek to offset
-            self.file.seek(offset)
-
-            # add the new block to our buffer
-            read_buf = self.file.read(BLOCKSIZE)
-
-            # XXX: trim off extra...
-            if len(read_buf) > BLOCKSIZE :
-                read_buf = read_buf[:BLOCKSIZE]
-
-            # make sure we got the right amount of data
-            assert len(read_buf) == BLOCKSIZE, "read(%d) @ %d/%d -> %d" % (BLOCKSIZE, offset, size, len(read_buf))
-
-            # add in our previous buf
-            buf = read_buf + buf
-            
-            # split out lines
-            buf_lines = buf.split(self.sep)
-
-            # keep the first one as our buffer, as it's incomplete
-            buf = buf_lines[0]
-
-            # add up to count lines to our lines buffer
-            lines = buf_lines[-min(count, len(buf_lines) - 1):] + lines
-
-            # update count
-            count -= (len(buf_lines) - 1)
-
-        # return the line list
-        return lines
-
-class LogDirectory (LogSource) :
-    """
-        A directory containing a series of timestamped LogFiles
-    """
-
-    def __init__ (self, path, tz, charset='utf-8', filename_fmt='%Y-%m-%d') :
-        """
-            Load the logfiles at the given path.
-            
-            The files contain data in the given charset, and are named according the the date in the given timezone and
-            date format.
-        """
-
-        # store
-        self.path = path
-        self.tz = tz
-        self.charset = charset
-        self.filename_fmt = filename_fmt
-
-    def _get_logfile_datetime (self, dt) :
-        """
-            Get the logfile corresponding to the given datetime
-        """
-
-        # convert to target timezone
-        dtz = dt.astimezone(self.tz)
-        
-        # convert to date and use that
-        return self._get_logfile_date(dtz.date())
-
-    def _get_logfile_date (self, d) :
-        """
-            Get the logfile corresponding to the given naive date in our timezone
-        """
-
-        # format filename
-        filename = d.strftime(self.filename_fmt)
-
-        # build path
-        path = os.path.join(self.path, filename)
-
-        # return the LogFile
-        return LogFile(path, self.charset)
-    
-    def _iter_backwards (self, dt=None) :
-        """
-            Yields an infinite series of naive date objects in our timezone, iterating backwards in time starting at the
-            given *datetime*, or the the current date, if none given
-        """
-        
-        # default to now
-        if not dt :
-            dt = datetime.now(pytz.utc)
-        
-        # convert to target timezone
-        dtz = dt.astimezone(self.tz)
-
-        # our timedelta
-        ONE_DAY = timedelta(1)
-        
-        # iterate unto infinity
-        while True :
-            # yield
-            yield dtz.date()
-            
-            # one day sdrawkcab
-            dtz -= ONE_DAY
-    
-    def get_latest (self, count) :
-        """
-            Uses _iter_backwards + _get_logfile_date to read the yield the given lines from as many logfiles as needed
-        """
-        
-        # iterate backwards from now
-        day_iter = self._iter_backwards()
-
-        # number of files read
-        files = 0
-
-        # only read up to 100 files or so
-        MAX_FILES = 100
-        
-        # loop until done
-        while count > 0 :
-            logfile = None
-
-            try :
-                # get next logfile
-                files += 1
-                
-                # open
-                logfile = self._get_logfile_date(day_iter.next())
-            
-            except IOError, e :
-                # skip nonexistant days if we haven't found any logs yet
-                if e.errno != errno.ENOENT :
-                    raise
-
-                if files > MAX_FILES :
-                    raise Exception("No recent logfiles found")
-                
-                else :
-                    # skip to next day
-                    continue
-
-            # yield lines
-            for line in logfile.get_latest(count) :
-                # yield while we still need to, otherwise, stop
-                if count > 0 :
-                    # decrement
-                    count -= 1
- 
-                    yield line
-            
-                else :
-                    break
-
-