implement channel_view count, the query stuff, css, layout all need some cleanup :(
"""
A source of IRC log files
"""
import codecs
from datetime import date, datetime, timedelta
import pytz
# for SEEK_*, errno
import os, errno
class LogSource (object) :
"""
A collection of IRC logs for a specific target in some format. Provides the possibility to read specific events
"""
def get_latest (self, count) :
"""
Yield the latest events, up to `count` of them.
"""
abstract
class LogFile (LogSource) :
"""
A file containing LogEvents
"""
def __init__ (self, path, charset='utf-8', sep='\n') :
"""
Open the file at the given path, which contains data of the given codec, as lines separated by the given separator
"""
# store
self.path = path
self.charset = charset
self.sep = sep
# open
self.file = codecs.open(path, 'r', charset)
def __iter__ (self) :
"""
Yields a series of lines, as read from the top of the file
"""
# seek to beginning
self.file.seek(0)
# iterate over lines
return iter(self.file)
def get_latest (self, count) :
"""
Returns up to <count> lines from the end of the file, or less, if the file doesn't contain that many lines
"""
# the list of lines
lines = []
# seek to end of file
self.file.seek(0, os.SEEK_END)
# read offset
# XXX; why -2 ?
size = offset = self.file.tell() - 2
# use this blocksize
BLOCKSIZE = 1024
# trailing data
buf = ''
# read a block at a time, backwards
while count > 0 and offset >= 0:
# update offset back one block
offset -= BLOCKSIZE
# normalize to zero
if offset < 0 :
offset = 0
# seek to offset
self.file.seek(offset)
# add the new block to our buffer
read_buf = self.file.read(BLOCKSIZE)
# XXX: trim off extra...
if len(read_buf) > BLOCKSIZE :
read_buf = read_buf[:BLOCKSIZE]
# make sure we got the right amount of data
assert len(read_buf) == BLOCKSIZE, "read(%d) @ %d/%d -> %d" % (BLOCKSIZE, offset, size, len(read_buf))
# add in our previous buf
buf = read_buf + buf
# split out lines
buf_lines = buf.split(self.sep)
# keep the first one as our buffer, as it's incomplete
buf = buf_lines[0]
# add up to count lines to our lines buffer
lines = buf_lines[-min(count, len(buf_lines) - 1):] + lines
# update count
count -= (len(buf_lines) - 1)
# return the line list
return lines
class LogDirectory (LogSource) :
"""
A directory containing a series of timestamped LogFiles
"""
def __init__ (self, path, tz, charset='utf-8', filename_fmt='%Y-%m-%d') :
"""
Load the logfiles at the given path.
The files contain data in the given charset, and are named according the the date in the given timezone and
date format.
"""
# store
self.path = path
self.tz = tz
self.charset = charset
self.filename_fmt = filename_fmt
def _get_logfile_datetime (self, dt) :
"""
Get the logfile corresponding to the given datetime
"""
# convert to target timezone
dtz = dt.astimezone(self.tz)
# convert to date and use that
return self._get_logfile_date(dtz.date())
def _get_logfile_date (self, d) :
"""
Get the logfile corresponding to the given naive date in our timezone
"""
# format filename
filename = d.strftime(self.filename_fmt)
# build path
path = os.path.join(self.path, filename)
# return the LogFile
return LogFile(path, self.charset)
def _iter_backwards (self, dt=None) :
"""
Yields an infinite series of naive date objects in our timezone, iterating backwards in time starting at the
given *datetime*, or the the current date, if none given
"""
# default to now
if not dt :
dt = datetime.now(pytz.utc)
# convert to target timezone
dtz = dt.astimezone(self.tz)
# our timedelta
ONE_DAY = timedelta(1)
# iterate unto infinity
while True :
# yield
yield dtz.date()
# one day sdrawkcab
dtz -= ONE_DAY
def get_latest (self, count) :
"""
Uses _iter_backwards + _get_logfile_date to read the yield the given lines from as many logfiles as needed
"""
# iterate backwards from now
day_iter = self._iter_backwards()
# number of files read
files = 0
# only read up to 100 files or so
MAX_FILES = 100
# loop until done
while count > 0 :
logfile = None
try :
# get next logfile
files += 1
# open
logfile = self._get_logfile_date(day_iter.next())
except IOError, e :
# skip nonexistant days if we haven't found any logs yet
if e.errno != errno.ENOENT :
raise
if files > MAX_FILES :
raise Exception("No recent logfiles found")
else :
# skip to next day
continue
# yield lines
for line in logfile.get_latest(count) :
# yield while we still need to, otherwise, stop
if count > 0 :
# decrement
count -= 1
yield line
else :
break