# HG changeset patch # User Tero Marttila # Date 1234238217 -7200 # Node ID a34e9f56ddda67ce83d9c87f380eb6f0282821b9 # Parent afd3120ec71ece351258b335f6dc533635f31be2 improve parser resilience, improve get_month_days, add 'Channel' item to general menu diff -r afd3120ec71e -r a34e9f56ddda handlers.py --- a/handlers.py Tue Feb 10 04:27:22 2009 +0200 +++ b/handlers.py Tue Feb 10 05:56:57 2009 +0200 @@ -157,7 +157,7 @@ )) # get set of days available - days = channel.source.get_month_days(target) + days = set(channel.source.get_month_days(target)) # display calendar return templates.render_to_response("channel_calendar", diff -r afd3120ec71e -r a34e9f56ddda log_parser.py --- a/log_parser.py Tue Feb 10 04:27:22 2009 +0200 +++ b/log_parser.py Tue Feb 10 05:56:57 2009 +0200 @@ -44,33 +44,54 @@ """ for offset, line in enumerate(lines) : - # status lines - if line.startswith('---') : - # XXX: handle these - continue - - # normal lines + # offset? + if starting_offset : + offset = starting_offset + offset + else : - # XXX: only parse timestamps for now - timestamp, data = line.split(' ', 1) - - # parse timestamp into naive datetime - dt = datetime.datetime.strptime(timestamp, self.timestamp_fmt) - - # override date? - if date : - dt = dt.replace(year=date.year, month=date.month, day=date.day) - - # now localize with timezone - dtz = self.tz.localize(dt) + offset = None + + # try and parse + try : + line = self.parse_line(line, date, offset) - # offset? - if starting_offset : - offset = starting_offset + offset + except Exception, e : + raise Exception("Parsing line failed: %r@%d: %s" % (line, offset, e)) + + else : + # yield unless None + if line : + yield line - else : - offset = None + def parse_line (self, line, date, offset=None) : + """ + Parse a single line, and return the resulting LogLine, or None, to ignore the line + """ + + # empty line + if not line : + return - # yield raw events - yield log_line.LogLine(offset, LogTypes.RAW, dtz, None, data) + # status lines + elif line.startswith('---') : + # XXX: handle these + return + + # normal lines + else : + # XXX: only parse timestamps for now + timestamp, data = line.split(' ', 1) + + # parse timestamp into naive datetime + dt = datetime.datetime.strptime(timestamp, self.timestamp_fmt) + + # override date? + if date : + dt = dt.replace(year=date.year, month=date.month, day=date.day) + + # now localize with timezone + dtz = self.tz.localize(dt) + # build raw event + return log_line.LogLine(offset, LogTypes.RAW, dtz, None, data) + diff -r afd3120ec71e -r a34e9f56ddda log_source.py --- a/log_source.py Tue Feb 10 04:27:22 2009 +0200 +++ b/log_source.py Tue Feb 10 05:56:57 2009 +0200 @@ -144,7 +144,7 @@ def get_month_days (self, dt) : """ - Get a set of dates, telling which days in the given month (as a datetime) have logs available + Return a sequence of dates, telling which days in the given month (as a datetime) have logs available """ abstract @@ -294,7 +294,7 @@ # yield the rest a line at a time in reverse order... this looks weird, but that's how slicing works :) # XXX: use something like islice, this has to build a slice object for line in lines[:0:-1] : - yield line.decode(self.charset) + yield self.decoder.decode(line) def read_latest (self, count) : """ @@ -516,28 +516,38 @@ f_begin.read_from(dtz_begin), f_end.read_until(dtz_end) if f_end else [] ) + + def _iter_month_days (self, month) : + """ + Iterates over the days of a month as dt objects with time=0 + """ + + # there's at most 31 days in a month... + for day in xrange(1, 32) : + try : + # try and build the datetime + dt = datetime.datetime(month.year, month.month, day) + + except : + # stop + return + + else : + # fix timezones + yield + yield month.tzinfo.localize(dt) def get_month_days (self, month) : """ Returns a set of dates for which logfiles are available in the given datetime's month """ - # the set of days - days = set() - - # iterate over month's days using Calendar - for date in calendar.Calendar().itermonthdates(month.year, month.month) : - # convert date to target datetime - dtz = month.tzinfo.localize(datetime.datetime.combine(date, datetime.time(0))).astimezone(self.tz) - + # iterate over month's days + for dt in self._iter_month_days(month) : # date in our target timezone - log_date = dtz.date() + log_date = dt.astimezone(self.tz).date() # test for it if self._get_logfile_date(log_date, load=False) : - # add to set - days.add(date) + # valid + yield dt.date() - # return set - return days - diff -r afd3120ec71e -r a34e9f56ddda scripts/search-index.py --- a/scripts/search-index.py Tue Feb 10 04:27:22 2009 +0200 +++ b/scripts/search-index.py Tue Feb 10 05:56:57 2009 +0200 @@ -99,7 +99,7 @@ raise # get the set of days - days = channel.source.get_month_days(month) + days = list(channel.source.get_month_days(month)) print "Loading %d days of logs:" % (len(days)) diff -r afd3120ec71e -r a34e9f56ddda templates/layout.tmpl --- a/templates/layout.tmpl Tue Feb 10 04:27:22 2009 +0200 +++ b/templates/layout.tmpl Tue Feb 10 05:56:57 2009 +0200 @@ -4,6 +4,16 @@