improve parser resilience, improve get_month_days, add 'Channel' item to general menu
authorTero Marttila <terom@fixme.fi>
Tue, 10 Feb 2009 05:56:57 +0200
changeset 83 a34e9f56ddda
parent 82 afd3120ec71e
child 84 3c78c9c080e9
improve parser resilience, improve get_month_days, add 'Channel' item to general menu
handlers.py
log_parser.py
log_source.py
scripts/search-index.py
templates/layout.tmpl
--- a/handlers.py	Tue Feb 10 04:27:22 2009 +0200
+++ b/handlers.py	Tue Feb 10 05:56:57 2009 +0200
@@ -157,7 +157,7 @@
     ))
 
     # get set of days available
-    days = channel.source.get_month_days(target)
+    days = set(channel.source.get_month_days(target))
 
     # display calendar
     return templates.render_to_response("channel_calendar",
--- a/log_parser.py	Tue Feb 10 04:27:22 2009 +0200
+++ b/log_parser.py	Tue Feb 10 05:56:57 2009 +0200
@@ -44,33 +44,54 @@
         """
 
         for offset, line in enumerate(lines) :
-            # status lines
-            if line.startswith('---') :
-                # XXX: handle these
-                continue
-            
-            # normal lines
+            # offset?
+            if starting_offset :
+                offset = starting_offset + offset
+
             else :
-                # XXX: only parse timestamps for now
-                timestamp, data = line.split(' ', 1)
-                
-                # parse timestamp into naive datetime
-                dt = datetime.datetime.strptime(timestamp, self.timestamp_fmt)
-                
-                # override date?
-                if date :
-                    dt = dt.replace(year=date.year, month=date.month, day=date.day)
-                
-                # now localize with timezone
-                dtz = self.tz.localize(dt)
+                offset = None
+            
+            # try and parse
+            try :
+                line = self.parse_line(line, date, offset)
 
-                # offset?
-                if starting_offset :
-                    offset = starting_offset + offset
+            except Exception, e :
+                raise Exception("Parsing line failed: %r@%d: %s" % (line, offset, e))
+            
+            else :
+                # yield unless None
+                if line :
+                    yield line
 
-                else :
-                    offset = None
+    def parse_line (self, line, date, offset=None) :
+        """
+            Parse a single line, and return the resulting LogLine, or None, to ignore the line
+        """
+        
+        # empty line
+        if not line :
+            return
 
-                # yield raw events
-                yield log_line.LogLine(offset, LogTypes.RAW, dtz, None, data)
+        # status lines
+        elif line.startswith('---') :
+            # XXX: handle these
+            return
+        
+        # normal lines
+        else :
+            # XXX: only parse timestamps for now
+            timestamp, data = line.split(' ', 1)
+            
+            # parse timestamp into naive datetime
+            dt = datetime.datetime.strptime(timestamp, self.timestamp_fmt)
+            
+            # override date?
+            if date :
+                dt = dt.replace(year=date.year, month=date.month, day=date.day)
+            
+            # now localize with timezone
+            dtz = self.tz.localize(dt)
 
+            # build raw event
+            return log_line.LogLine(offset, LogTypes.RAW, dtz, None, data)
+
--- a/log_source.py	Tue Feb 10 04:27:22 2009 +0200
+++ b/log_source.py	Tue Feb 10 05:56:57 2009 +0200
@@ -144,7 +144,7 @@
 
     def get_month_days (self, dt) :
         """
-            Get a set of dates, telling which days in the given month (as a datetime) have logs available
+            Return a sequence of dates, telling which days in the given month (as a datetime) have logs available
         """
 
         abstract
@@ -294,7 +294,7 @@
             # yield the rest a line at a time in reverse order... this looks weird, but that's how slicing works :)
             # XXX: use something like islice, this has to build a slice object
             for line in lines[:0:-1] :
-                yield line.decode(self.charset)
+                yield self.decoder.decode(line)
 
     def read_latest (self, count) :
         """
@@ -516,28 +516,38 @@
                 f_begin.read_from(dtz_begin), 
                 f_end.read_until(dtz_end) if f_end else []
             )
+    
+    def _iter_month_days (self, month) :
+        """
+            Iterates over the days of a month as dt objects with time=0
+        """
+        
+        # there's at most 31 days in a month...
+        for day in xrange(1, 32) :
+            try :
+                # try and build the datetime
+                dt = datetime.datetime(month.year, month.month, day)
+
+            except :
+                # stop
+                return
+            
+            else :
+                # fix timezones + yield
+                yield month.tzinfo.localize(dt)
 
     def get_month_days (self, month) :
         """
             Returns a set of dates for which logfiles are available in the given datetime's month
         """
         
-        # the set of days
-        days = set()
-        
-        # iterate over month's days using Calendar
-        for date in calendar.Calendar().itermonthdates(month.year, month.month) :
-            # convert date to target datetime
-            dtz = month.tzinfo.localize(datetime.datetime.combine(date, datetime.time(0))).astimezone(self.tz)
-
+        # iterate over month's days
+        for dt in self._iter_month_days(month) :
             # date in our target timezone
-            log_date = dtz.date()
+            log_date = dt.astimezone(self.tz).date()
             
             # test for it
             if self._get_logfile_date(log_date, load=False) :
-                # add to set
-                days.add(date)
+                # valid
+                yield dt.date()
 
-        # return set
-        return days
-
--- a/scripts/search-index.py	Tue Feb 10 04:27:22 2009 +0200
+++ b/scripts/search-index.py	Tue Feb 10 05:56:57 2009 +0200
@@ -99,7 +99,7 @@
                 raise
         
         # get the set of days
-        days = channel.source.get_month_days(month)
+        days = list(channel.source.get_month_days(month))
 
         print "Loading %d days of logs:" % (len(days))
 
--- a/templates/layout.tmpl	Tue Feb 10 04:27:22 2009 +0200
+++ b/templates/layout.tmpl	Tue Feb 10 05:56:57 2009 +0200
@@ -4,6 +4,16 @@
 <ul>
     <li><a href="${urls.index.build(req)}">Home</a></li>
     <li><a href="${urls.preferences.build(req)}">Preferences</a></li>
+    
+    <li>
+        <span>Channel:</span>
+    </li><li class="join-left">
+        <form action="${urls.channel_select.build(req)}" method="GET">
+            <select name="channel">
+            ${h.select_options(((ch.id, ch.title) for ch in channel_list), channel.id if channel else None)}
+            </select><input type="submit" value="Go &raquo;" />
+        </form>
+    </li>
 </ul>
 </%def>