diff -r 8103d18907a0 -r b65a95eb9f6b log_source.py --- a/log_source.py Mon Feb 09 03:05:43 2009 +0200 +++ b/log_source.py Mon Feb 09 04:39:24 2009 +0200 @@ -2,7 +2,7 @@ A source of IRC log files """ -import datetime, itertools +import datetime, calendar, itertools import os, errno import pytz @@ -24,10 +24,19 @@ """ abstract + + def get_month_days (self, dt) : + """ + Get a set of dates, telling which days in the given month (as a datetime) have logs available + """ -class LogFile (LogSource) : + abstract + +class LogFile (object) : """ A file containing LogEvents + + XXX: modify to implement LogSource? """ def __init__ (self, path, parser, start_date=None, charset='utf-8', sep='\n') : @@ -168,7 +177,7 @@ for line in lines[:0:-1] : yield line.decode(self.charset) - def get_latest (self, count) : + def read_latest (self, count) : """ Returns up to count events, from the end of the file, or less, if the file doesn't contain that many lines. """ @@ -221,9 +230,12 @@ # convert to date and use that return self._get_logfile_date(dtz.date()) - def _get_logfile_date (self, d) : + def _get_logfile_date (self, d, load=True) : """ - Get the logfile corresponding to the given naive date in our timezone + Get the logfile corresponding to the given naive date in our timezone. If load is False, only test for the + presence of the logfile, do not actually open it. + + Returns None if the logfile does not exist. """ # format filename @@ -231,9 +243,24 @@ # build path path = os.path.join(self.path, filename) + + try : + if load : + # open+return the LogFile + return LogFile(path, self.parser, d, self.charset) + + else : + # test + return os.path.exists(path) - # return the LogFile - return LogFile(path, self.parser, d, self.charset) + # XXX: move to LogFile + except IOError, e : + # return None for missing files + if e.errno == errno.ENOENT : + return None + + else : + raise def _iter_date_reverse (self, dt=None) : """ @@ -280,18 +307,13 @@ while len(lines) < count : logfile = None - try : - # get next logfile - files += 1 - - # open - logfile = self._get_logfile_date(day_iter.next()) + # get next logfile + files += 1 - except IOError, e : - # skip nonexistant days if we haven't found any logs yet - if e.errno != errno.ENOENT : - raise - + # open + logfile = self._get_logfile_date(day_iter.next()) + + if not logfile : if files > MAX_FILES : raise Exception("No recent logfiles found") @@ -301,7 +323,7 @@ # read the events # XXX: use a queue - lines = list(logfile.get_latest(count)) + lines + lines = list(logfile.read_latest(count)) + lines # return the events return lines @@ -333,3 +355,27 @@ # chain together the two sources return itertools.chain(f_begin.read_from(dtz_begin), f_end.read_until(dtz_end)) + def get_month_days (self, month) : + """ + Returns a set of dates for which logfiles are available in the given datetime's month + """ + + # the set of days + days = set() + + # iterate over month's days using Calendar + for date in calendar.Calendar().itermonthdates(month.year, month.month) : + # convert date to target datetime + dtz = month.tzinfo.localize(datetime.datetime.combine(date, datetime.time(0))).astimezone(self.tz) + + # date in our target timezone + log_date = dtz.date() + + # test for it + if self._get_logfile_date(log_date, load=False) : + # add to set + days.add(date) + + # return set + return days +