--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sites/irclogs.qmsk.net/log_source.py Sun Feb 08 00:29:36 2009 +0200
@@ -0,0 +1,227 @@
+"""
+ A source of IRC log files
+"""
+
+import codecs
+from datetime import date, datetime, timedelta
+import pytz
+
+# for SEEK_*, errno
+import os, errno
+
+class LogSource (object) :
+ """
+ A collection of IRC logs for a specific target in some format. Provides the possibility to read specific events
+ """
+
+ def get_latest (self, count) :
+ """
+ Yield the latest events, up to `count` of them.
+ """
+
+ abstract
+
+class LogFile (LogSource) :
+ """
+ A file containing LogEvents
+ """
+
+ def __init__ (self, path, charset='utf-8', sep='\n') :
+ """
+ Open the file at the given path, which contains data of the given codec, as lines separated by the given separator
+ """
+
+ # store
+ self.path = path
+ self.charset = charset
+ self.sep = sep
+
+ # open
+ self.file = codecs.open(path, 'r', charset)
+
+ def __iter__ (self) :
+ """
+ Yields a series of lines, as read from the top of the file
+ """
+
+ # seek to beginning
+ self.file.seek(0)
+
+ # iterate over lines
+ return iter(self.file)
+
+ def get_latest (self, count) :
+ """
+ Returns up to <count> lines from the end of the file, or less, if the file doesn't contain that many lines
+ """
+
+ # the list of lines
+ lines = []
+
+ # seek to end of file
+ self.file.seek(0, os.SEEK_END)
+
+ # read offset
+ # XXX; why -2 ?
+ offset = self.file.tell() - 2
+
+ # use this blocksize
+ BLOCKSIZE = 1024
+
+ # trailing data
+ buf = ''
+
+ # read a block at a time, backwards
+ while count > 0 and offset >= 0:
+ # update offset
+ offset -= BLOCKSIZE
+
+ # normalize to zero
+ if offset < 0 :
+ offset = 0
+
+ # seek backwards one block
+ self.file.seek(offset)
+
+ # add the new block to our buffer
+ read_buf = self.file.read(BLOCKSIZE)
+
+ # make sure we got the right amount of data
+ assert len(read_buf) == BLOCKSIZE, "read(%d) -> %d" % (BLOCKSIZE, len(read_buf))
+
+ # add in our previous buf
+ buf = read_buf + buf
+
+ # split out lines
+ buf_lines = buf.split(self.sep)
+
+ # keep the first one as our buffer, as it's incomplete
+ buf = buf_lines[0]
+
+ # add up to count lines to our lines buffer
+ lines = buf_lines[1:count + 1] + lines
+
+ # update count
+ count -= (len(buf_lines) - 1)
+
+ # return the line list
+ return lines
+
+class LogDirectory (LogSource) :
+ """
+ A directory containing a series of timestamped LogFiles
+ """
+
+ def __init__ (self, path, tz, charset='utf-8', filename_fmt='%Y-%m-%d') :
+ """
+ Load the logfiles at the given path.
+
+ The files contain data in the given charset, and are named according the the date in the given timezone and
+ date format.
+ """
+
+ # store
+ self.path = path
+ self.tz = tz
+ self.charset = charset
+ self.filename_fmt = filename_fmt
+
+ def _get_logfile_datetime (self, dt) :
+ """
+ Get the logfile corresponding to the given datetime
+ """
+
+ # convert to target timezone
+ dtz = dt.astimezone(self.tz)
+
+ # convert to date and use that
+ return self._get_logfile_date(dtz.date())
+
+ def _get_logfile_date (self, d) :
+ """
+ Get the logfile corresponding to the given naive date in our timezone
+ """
+
+ # format filename
+ filename = d.strftime(self.filename_fmt)
+
+ # build path
+ path = os.path.join(self.path, filename)
+
+ # return the LogFile
+ return LogFile(path, self.charset)
+
+ def _iter_backwards (self, dt=None) :
+ """
+ Yields an infinite series of naive date objects in our timezone, iterating backwards in time starting at the
+ given *datetime*, or the the current date, if none given
+ """
+
+ # default to now
+ if not dt :
+ dt = datetime.now(pytz.utc)
+
+ # convert to target timezone
+ dtz = dt.astimezone(self.tz)
+
+ # our timedelta
+ ONE_DAY = timedelta(1)
+
+ # iterate unto infinity
+ while True :
+ # yield
+ yield dtz.date()
+
+ # one day sdrawkcab
+ dtz -= ONE_DAY
+
+ def get_latest (self, count) :
+ """
+ Uses _iter_backwards + _get_logfile_date to read the yield the given lines from as many logfiles as needed
+ """
+
+ # iterate backwards from now
+ day_iter = self._iter_backwards()
+
+ # number of files read
+ files = 0
+
+ # only read up to 100 files or so
+ MAX_FILES = 100
+
+ # loop until done
+ while count > 0 :
+ logfile = None
+
+ try :
+ # get next logfile
+ files += 1
+
+ # open
+ logfile = self._get_logfile_date(day_iter.next())
+
+ except IOError, e :
+ # skip nonexistant days if we haven't found any logs yet
+ if e.errno != errno.ENOENT :
+ raise
+
+ if files > MAX_FILES :
+ raise Exception("No recent logfiles found")
+
+ else :
+ # skip to next day
+ continue
+
+ # yield lines
+ for line in logfile.get_latest(count) :
+ # yield while we still need to, otherwise, stop
+ if count > 0 :
+ # decrement
+ count -= 1
+
+ yield line
+
+ else :
+ break
+
+