--- a/sites/irclogs.qmsk.net/log_source.py Sun Feb 08 03:13:11 2009 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,231 +0,0 @@
-"""
- A source of IRC log files
-"""
-
-import codecs
-from datetime import date, datetime, timedelta
-import pytz
-
-# for SEEK_*, errno
-import os, errno
-
-class LogSource (object) :
- """
- A collection of IRC logs for a specific target in some format. Provides the possibility to read specific events
- """
-
- def get_latest (self, count) :
- """
- Yield the latest events, up to `count` of them.
- """
-
- abstract
-
-class LogFile (LogSource) :
- """
- A file containing LogEvents
- """
-
- def __init__ (self, path, charset='utf-8', sep='\n') :
- """
- Open the file at the given path, which contains data of the given codec, as lines separated by the given separator
- """
-
- # store
- self.path = path
- self.charset = charset
- self.sep = sep
-
- # open
- self.file = codecs.open(path, 'r', charset)
-
- def __iter__ (self) :
- """
- Yields a series of lines, as read from the top of the file
- """
-
- # seek to beginning
- self.file.seek(0)
-
- # iterate over lines
- return iter(self.file)
-
- def get_latest (self, count) :
- """
- Returns up to <count> lines from the end of the file, or less, if the file doesn't contain that many lines
- """
-
- # the list of lines
- lines = []
-
- # seek to end of file
- self.file.seek(0, os.SEEK_END)
-
- # read offset
- # XXX; why -2 ?
- size = offset = self.file.tell() - 2
-
- # use this blocksize
- BLOCKSIZE = 1024
-
- # trailing data
- buf = ''
-
- # read a block at a time, backwards
- while count > 0 and offset >= 0:
- # update offset back one block
- offset -= BLOCKSIZE
-
- # normalize to zero
- if offset < 0 :
- offset = 0
-
- # seek to offset
- self.file.seek(offset)
-
- # add the new block to our buffer
- read_buf = self.file.read(BLOCKSIZE)
-
- # XXX: trim off extra...
- if len(read_buf) > BLOCKSIZE :
- read_buf = read_buf[:BLOCKSIZE]
-
- # make sure we got the right amount of data
- assert len(read_buf) == BLOCKSIZE, "read(%d) @ %d/%d -> %d" % (BLOCKSIZE, offset, size, len(read_buf))
-
- # add in our previous buf
- buf = read_buf + buf
-
- # split out lines
- buf_lines = buf.split(self.sep)
-
- # keep the first one as our buffer, as it's incomplete
- buf = buf_lines[0]
-
- # add up to count lines to our lines buffer
- lines = buf_lines[-min(count, len(buf_lines) - 1):] + lines
-
- # update count
- count -= (len(buf_lines) - 1)
-
- # return the line list
- return lines
-
-class LogDirectory (LogSource) :
- """
- A directory containing a series of timestamped LogFiles
- """
-
- def __init__ (self, path, tz, charset='utf-8', filename_fmt='%Y-%m-%d') :
- """
- Load the logfiles at the given path.
-
- The files contain data in the given charset, and are named according the the date in the given timezone and
- date format.
- """
-
- # store
- self.path = path
- self.tz = tz
- self.charset = charset
- self.filename_fmt = filename_fmt
-
- def _get_logfile_datetime (self, dt) :
- """
- Get the logfile corresponding to the given datetime
- """
-
- # convert to target timezone
- dtz = dt.astimezone(self.tz)
-
- # convert to date and use that
- return self._get_logfile_date(dtz.date())
-
- def _get_logfile_date (self, d) :
- """
- Get the logfile corresponding to the given naive date in our timezone
- """
-
- # format filename
- filename = d.strftime(self.filename_fmt)
-
- # build path
- path = os.path.join(self.path, filename)
-
- # return the LogFile
- return LogFile(path, self.charset)
-
- def _iter_backwards (self, dt=None) :
- """
- Yields an infinite series of naive date objects in our timezone, iterating backwards in time starting at the
- given *datetime*, or the the current date, if none given
- """
-
- # default to now
- if not dt :
- dt = datetime.now(pytz.utc)
-
- # convert to target timezone
- dtz = dt.astimezone(self.tz)
-
- # our timedelta
- ONE_DAY = timedelta(1)
-
- # iterate unto infinity
- while True :
- # yield
- yield dtz.date()
-
- # one day sdrawkcab
- dtz -= ONE_DAY
-
- def get_latest (self, count) :
- """
- Uses _iter_backwards + _get_logfile_date to read the yield the given lines from as many logfiles as needed
- """
-
- # iterate backwards from now
- day_iter = self._iter_backwards()
-
- # number of files read
- files = 0
-
- # only read up to 100 files or so
- MAX_FILES = 100
-
- # loop until done
- while count > 0 :
- logfile = None
-
- try :
- # get next logfile
- files += 1
-
- # open
- logfile = self._get_logfile_date(day_iter.next())
-
- except IOError, e :
- # skip nonexistant days if we haven't found any logs yet
- if e.errno != errno.ENOENT :
- raise
-
- if files > MAX_FILES :
- raise Exception("No recent logfiles found")
-
- else :
- # skip to next day
- continue
-
- # yield lines
- for line in logfile.get_latest(count) :
- # yield while we still need to, otherwise, stop
- if count > 0 :
- # decrement
- count -= 1
-
- yield line
-
- else :
- break
-
-