# HG changeset patch # User Tero Marttila # Date 1234131853 -7200 # Node ID f13cf27a360b39bfc966cf41804e6642a49230de # Parent aaa62c8e5bd5c80cdac7558009c59d0e80d232a4 implement more LogSource features (logs for date, cleanup last_logs), implement irssi parser, formatter, other misc. stuff diff -r aaa62c8e5bd5 -r f13cf27a360b channels.py --- a/channels.py Sun Feb 08 04:59:22 2009 +0200 +++ b/channels.py Mon Feb 09 00:24:13 2009 +0200 @@ -9,6 +9,7 @@ from log_channel import LogChannel from log_source import LogDirectory +from log_parser import IrssiParser relpath = lambda path : os.path.join(os.path.dirname(__file__), path) @@ -16,14 +17,20 @@ """ The list of channels, and related methods """ + + # timezone to use + TIMEZONE = pytz.timezone('Europe/Helsinki') + + # the parser that we use + PARSER = IrssiParser(TIMEZONE, "%H:%M:%S") # the statically defined channel list CHANNELS = { 'tycoon': LogChannel('tycoon', "OFTC", "#tycoon", - LogDirectory(relpath('logs/tycoon'), pytz.timezone('Europe/Helsinki')) + LogDirectory(relpath('logs/tycoon'), TIMEZONE, PARSER) ), 'openttd': LogChannel('openttd', "OFTC", "#openttd", - LogDirectory(relpath('logs/openttd'), pytz.timezone('Europe/Helsinki')) + LogDirectory(relpath('logs/openttd'), TIMEZONE, PARSER) ), } diff -r aaa62c8e5bd5 -r f13cf27a360b handlers.py --- a/handlers.py Sun Feb 08 04:59:22 2009 +0200 +++ b/handlers.py Mon Feb 09 00:24:13 2009 +0200 @@ -4,10 +4,11 @@ from qmsk.web import http, template -import urls, channels +import urls, channels, helpers # load templates from here templates = template.TemplateLoader("templates", + h = helpers, urls = urls, channel_list = channels.channel_list, ) @@ -28,37 +29,61 @@ return http.Redirect(urls.channel_view.build(request, channel=channel.id)) -def channel_view (request, channel, count) : +def channel_view (request, channel, count, formatter) : """ The main channel view page, display the most important info, and all requisite links """ - - if count == 'all' : - xxx + + # get latest events + lines = channel.source.get_latest(count) - else : - count = int(count) + # format + lines = formatter.format_html(lines) - return templates.render_to_response("channel", + return templates.render_to_response("channel_view", req = request, channel = channel, count = count, - lines = channel.source.get_latest(count), + formatter = formatter, + lines = lines, ) - pass - def channel_last (request, channel, count, format) : """ Display the last x lines of channel messages in various formats """ if format == 'txt' : - return http.Response('\n'.join(channel.source.get_latest(count)), 'text/plain') - + # XXX: formatting +# return http.Response('\n'.join(str(channel.source.get_latest(count))), 'text/plain') + pass + else : raise http.ResponseError("Unknown filetype %r" % format) +def channel_date (request, channel, date, formatter) : + """ + Display all log data for the given date + """ + + # XXX: fix date timezone + import pytz + date = date.replace(tzinfo=pytz.utc) + + # get latest events + lines = channel.source.get_date(date) + + # format + lines = formatter.format_html(lines) + + return templates.render_to_response("channel_date", + req = request, + channel = channel, + formatter = formatter, + date = date, + lines = lines, + ) + def channel_search (request, channel, q) : """ Display the search form for the channel for GET, or do the search for POST diff -r aaa62c8e5bd5 -r f13cf27a360b helpers.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/helpers.py Mon Feb 09 00:24:13 2009 +0200 @@ -0,0 +1,22 @@ +""" + Some additional helpers +""" + +# "inherit" qmsk.web's helpers +from qmsk.web.helpers import * + +def tz_name (tz) : + """ + Returns a string describing the given timezone + """ + + return str(tz) + +def fmt_date (date) : + """ + Formats a date + """ + + # XXX: hardcoded + return date.strftime('%Y-%m-%d') + diff -r aaa62c8e5bd5 -r f13cf27a360b log_event.py --- a/log_event.py Sun Feb 08 04:59:22 2009 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -""" - An IRC logfile consists of a series of events, a.k.a. "lines" -""" - -class LogEvent (object) : - """ - An event on some specific channel - """ - - # the event ype - type = None - - # the UTC timestamp of the event - timestamp = None - - # the event source - source = None - - # associated data (message, etc) - data = None - diff -r aaa62c8e5bd5 -r f13cf27a360b log_formatter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/log_formatter.py Mon Feb 09 00:24:13 2009 +0200 @@ -0,0 +1,106 @@ +""" + Format LogLines into some other representation +""" + +from qmsk.web import helpers + +from log_line import LogTypes + +class LogFormatter (object) : + """ + Provides a method to format series of LogLines into various output formats, with varying themes + """ + + def __init__ (self, tz, timestamp_fmt="%H:%M:%S") : + """ + Initialize to format timestamps with the given timezone and timestamp + """ + + self.tz = tz + self.timestamp_fmt = timestamp_fmt + + def _format_line_text (self, line, template_dict) : + """ + Format the given line as text, using the given { type: string template } dict + """ + + # look up the template + template = template_dict[line.type] + + # build timestamp + timestamp = line.timestamp.astimezone(self.tz).strftime(self.timestamp_fmt) + + # format with dict + return template % dict( + timestamp = timestamp, + source = line.source, + data = line.data, + ) + + def format_txt (self, lines) : + """ + Format as plaintext + """ + + abstract + + def format_html (self, lines) : + """ + Format as HTML + """ + + abstract + +class IrssiTextFormatter (LogFormatter) : + """ + Implements format_txt for irssi-style output + """ + + # format definitions by type + __FMT = { + LogTypes.RAW : "%(timestamp)s %(data)s", + } + + def format_txt (self, lines) : + # ...handle each line + for line in lines : + # using __TYPES + yield self._format_line_text(line, self.__FMT) + +class IrssiFormatter (IrssiTextFormatter) : + """ + Implements plain black-and-white irssi-style formatting + """ + + def format_html (self, lines) : + """ + Just uses format_txt, but wraps in

+        """
+        
+        # open pre
+        yield "
"
+        
+        # format using IrssiTextFormatter
+        for line in self.format_txt(lines) :
+            # escape HTML
+            yield helpers.escape(line)
+
+        # close pre
+        yield "
" + +# define formatters by name +FORMATTERS = { + 'irssi': IrssiFormatter, +} + +def by_name (name) : + """ + Lookup and return a formatter by name + + XXX: uses default timezone/timefmt + """ + + import pytz + + return FORMATTERS[name](pytz.utc) + diff -r aaa62c8e5bd5 -r f13cf27a360b log_line.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/log_line.py Mon Feb 09 00:24:13 2009 +0200 @@ -0,0 +1,39 @@ +""" + An IRC logfile consists of a series of lines/events +""" + +class LogTypes : + """ + Definitions of the various LogLines types + """ + + # unknown type, may or may not have a timestamp, no source, only data + RAW = 0x01 + +class LogLine (object) : + """ + An event on some specific channel + """ + + # the event type, as defiend in LogTypes + type = None + + # the UTC timestamp of the event + timestamp = None + + # the event source + source = None + + # associated data (message, etc) + data = None + + def __init__ (self, type, timestamp, source, data) : + """ + Initialize with given values + """ + + self.type = type + self.timestamp = timestamp + self.source = source + self.data = data + diff -r aaa62c8e5bd5 -r f13cf27a360b log_parser.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/log_parser.py Mon Feb 09 00:24:13 2009 +0200 @@ -0,0 +1,67 @@ +""" + Parse log data into log_events +""" + +import datetime + +import log_line +from log_line import LogTypes + +class LogParser (object) : + """ + Abstract interface + """ + + def __init__ (self, tz, timestamp_fmt="%H:%M:%S") : + """ + Setup the parser to use the given format for line timestamps, which are of the given timezone + """ + + self.tz = tz + self.timestamp_fmt = timestamp_fmt + + def parse_lines (self, lines, date=None) : + """ + Parse the given (iterable) lines of unicode text into a LogEvent, no trailing newline. + + Giving date lets the parser build full timestamps, otherwise, unless line timestamps have full date + information, event timestamps will have a date component of 1900/1/1. + """ + + abstract + + +class IrssiParser (LogParser) : + """ + A parser for irssi logfiles + """ + + def parse_lines (self, lines, date=None) : + """ + Parse the given lines, yielding LogEvents. + """ + + for line in lines : + # status lines + if line.startswith('---') : + # XXX: handle these + continue + + # normal lines + else : + # XXX: only parse timestamps for now + timestamp, data = line.split(' ', 1) + + # parse timestamp into naive datetime + dt = datetime.datetime.strptime(timestamp, self.timestamp_fmt) + + # override date? + if date : + dt = dt.replace(year=date.year, month=date.month, day=date.day) + + # now localize with timezone + dtz = self.tz.localize(dt) + + # yield raw events + yield log_line.LogLine(LogTypes.RAW, dtz, None, data) + diff -r aaa62c8e5bd5 -r f13cf27a360b log_source.py --- a/log_source.py Sun Feb 08 04:59:22 2009 +0200 +++ b/log_source.py Mon Feb 09 00:24:13 2009 +0200 @@ -2,13 +2,10 @@ A source of IRC log files """ -import codecs -from datetime import date, datetime, timedelta +import datetime, itertools +import os, errno import pytz -# for SEEK_*, errno -import os, errno - class LogSource (object) : """ A collection of IRC logs for a specific target in some format. Provides the possibility to read specific events @@ -20,19 +17,30 @@ """ abstract + + def get_date (self, dt) : + """ + Get logs for the given date (as a datetime) + """ + + abstract class LogFile (LogSource) : """ A file containing LogEvents """ - def __init__ (self, path, charset='utf-8', sep='\n') : + def __init__ (self, path, parser, start_date=None, charset='utf-8', sep='\n') : """ - Open the file at the given path, which contains data of the given codec, as lines separated by the given separator + Open the file at the given path, which contains data with the given charset, as lines separated by the + given separator. Lines are parsed using the given parser, using the given date as an initial date, see + LogParser for more info. XXX: currently we assume start_date also for the end of the file """ # store self.path = path + self.parser = parser + self.start_date = start_date self.charset = charset self.sep = sep @@ -41,22 +49,68 @@ def __iter__ (self) : """ - Yields a series of lines, as read from the top of the file + Yields a series of unicode lines, as read from the top of the file """ # seek to beginning self.file.seek(0) - # iterate over lines - return iter(self.file) + # iterate over lines, decoding them as well + return (line.decode(self.charset) for line in self.file) - def get_latest (self, count) : + def read_full (self) : """ - Returns up to lines from the end of the file, or less, if the file doesn't contain that many lines + Reads all LogLines + """ + + # just use our __iter__ + return self.parser.parse_lines(self, self.start_date) + + def read_from (self, dt) : + """ + Reads all LogLines from the given naive timestamp onwards + """ + + # start reading at beginning + events = self.read_full() + + # skip unwanted events + for event in events : + if event.timestamp < dt : + continue + + else : + # include this line as well + yield event + break + + # yield the rest as-is + for event in events : + yield event + + def read_until (self, dt) : + """ + Reads all LogLines up until the given naive timestamp """ - # the list of lines - lines = [] + # start reading events at the beginning + events = self.read_full() + + # yield events until we hit the given timestamp + for event in events : + if event.timestamp <= dt : + yield event + + else : + break + + # ignore the rest + return + + def _read_blocks_reverse (self, blocksize=1024) : + """ + Yields blocks of file data in reverse order, starting at the end of the file + """ # seek to end of file self.file.seek(0, os.SEEK_END) @@ -64,20 +118,14 @@ # read offset # XXX: hack -1 to get rid of trailing newline size = offset = self.file.tell() - 1 - - # use this blocksize - BLOCKSIZE = 1024 - - # trailing data - buf = '' - - # read a block at a time, backwards - while len(lines) < count and offset > 0: + + # do not try to read past the beginning of the file + while offset > 0: # calc new offset + size - if offset > BLOCKSIZE : + if offset > blocksize : # full block - offset -= BLOCKSIZE - read_size = BLOCKSIZE + offset -= blocksize + read_size = blocksize else : # partial block @@ -88,47 +136,77 @@ self.file.seek(offset) # read the data we want - read_buf = self.file.read(read_size) - read_len = len(read_buf) + block = self.file.read(read_size) # sanity check - assert read_len == read_size + assert len(block) == read_size + # yield + yield block + + def _read_lines_reverse (self) : + """ + Yields decoded lines from the end of the file, in reverse order. + """ + + # partial lines + buf = '' + + # read from end of file, a block at a time + for block in self._read_blocks_reverse() : # add in our previous buf - buf = read_buf + buf + buf = block + buf - # split out lines - buf_lines = buf.split(self.sep) + # split up lines + lines = buf.split(self.sep) # keep the first one as our buffer, as it's incomplete - buf = buf_lines[0] + buf = lines[0] + + # yield the rest a line at a time in reverse order... this looks weird, but that's how slicing works :) + # XXX: use something like islice, this has to build a slice object + for line in lines[:0:-1] : + yield line.decode(self.charset) - # prepend up to count lines from the end to our lines buffer - lines = buf_lines[-min(count, len(buf_lines) - 1):] + lines + def get_latest (self, count) : + """ + Returns up to count events, from the end of the file, or less, if the file doesn't contain that many lines. + """ + + # the list of lines + lines = [] + + # start reading lines into lines + for line in self._read_lines_reverse() : + # append + lines.append(line) + + # done? + if len(lines) >= count : + break - # decode - # XXX: better queue implementation, plz - lines = [line.decode(self.charset) for line in lines] - - # return the line list - return lines + # decode in reverse order, using our starting date.... + # XXX: use lines[::-1] or reversed? + # XXX: it may make more sense to parse in reverse order, using 'self.end_date' or something like that + return self.parser.parse_lines(reversed(lines), self.start_date) class LogDirectory (LogSource) : """ A directory containing a series of timestamped LogFiles """ - def __init__ (self, path, tz, charset='utf-8', filename_fmt='%Y-%m-%d') : + def __init__ (self, path, tz, parser, charset='utf-8', filename_fmt='%Y-%m-%d') : """ Load the logfiles at the given path. The files contain data in the given charset, and are named according the the date in the given timezone and - date format. + date format, and will be parsed using the given parser. """ # store self.path = path self.tz = tz + self.parser = parser self.charset = charset self.filename_fmt = filename_fmt @@ -155,9 +233,9 @@ path = os.path.join(self.path, filename) # return the LogFile - return LogFile(path, self.charset) + return LogFile(path, self.parser, d, self.charset) - def _iter_backwards (self, dt=None) : + def _iter_date_reverse (self, dt=None) : """ Yields an infinite series of naive date objects in our timezone, iterating backwards in time starting at the given *datetime*, or the the current date, if none given @@ -165,13 +243,13 @@ # default to now if not dt : - dt = datetime.now(pytz.utc) + dt = datetime.datetime.now(pytz.utc) # convert to target timezone dtz = dt.astimezone(self.tz) # our timedelta - ONE_DAY = timedelta(1) + ONE_DAY = datetime.timedelta(1) # iterate unto infinity while True : @@ -187,7 +265,7 @@ """ # iterate backwards from now - day_iter = self._iter_backwards() + day_iter = self._iter_date_reverse() # number of files read files = 0 @@ -195,7 +273,7 @@ # only read up to 100 files or so MAX_FILES = 100 - # read the lines into here + # read the events into here lines = [] # loop until done @@ -221,9 +299,37 @@ # skip to next day continue - # read the lines - lines = logfile.get_latest(count) + lines + # read the events + # XXX: use a queue + lines = list(logfile.get_latest(count)) + lines - # return the lines + # return the events return lines + def get_date (self, dt) : + """ + A 'day' is considered to be a 24-hour period from 00:00:00 23:59:59. If the timezone of the given datetime + differs from our native datetime, this may involve lines from more than one logfile. + """ + + # begin/end of 24h period, in target timezone + dtz_begin = dt.replace(hour=0, minute=0, second=0).astimezone(self.tz) + dtz_end = dt.replace(hour=23, minute=59, second=59, microsecond=999999).astimezone(self.tz) + + # as dates + d_begin = dtz_begin.date() + d_end = dtz_end.date() + + # if they're the same, just pull the full log for that date + if d_begin == d_end : + return self._get_logfile_date(d_begin).read_full() + + # otherwise, we need to pull two partial logs + else : + # open both of them + f_begin = self._get_logfile_date(d_begin) + f_end = self._get_logfile_date(d_end) + + # chain together the two sources + return itertools.chain(f_begin.read_from(dtz_begin), f_end.read_until(dtz_end)) + diff -r aaa62c8e5bd5 -r f13cf27a360b static/irclogs.css --- a/static/irclogs.css Sun Feb 08 04:59:22 2009 +0200 +++ b/static/irclogs.css Mon Feb 09 00:24:13 2009 +0200 @@ -51,7 +51,7 @@ height: 1.5em; } -#menu li:first { +#menu li:first-child { border-left: none; } @@ -93,10 +93,6 @@ background-color: #d0d0d0; } -#menu form option { - background-color: auto; -} - /* * Content */ @@ -108,8 +104,7 @@ * Footer */ div#footer { - width: 100%; - padding: 10px 0px 10px; + padding: 10px; border-top: 1px dashed #a5a5a5; diff -r aaa62c8e5bd5 -r f13cf27a360b templates/channel.tmpl --- a/templates/channel.tmpl Sun Feb 08 04:59:22 2009 +0200 +++ b/templates/channel.tmpl Mon Feb 09 00:24:13 2009 +0200 @@ -45,11 +45,8 @@ -

${channel.title} » Last ${count} lines

+${next.body()} -
-% for line in lines :
-${line | h}
-% endfor
-
- +<%def name="footer_right()"> + All times are in ${h.tz_name(formatter.tz)} + diff -r aaa62c8e5bd5 -r f13cf27a360b templates/channel_date.tmpl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/templates/channel_date.tmpl Mon Feb 09 00:24:13 2009 +0200 @@ -0,0 +1,9 @@ +<%inherit file="channel.tmpl" /> + +

${channel.title} » Logs for ${h.fmt_date(date)}

+ +% for line in lines : +${line}\ +% endfor + + diff -r aaa62c8e5bd5 -r f13cf27a360b templates/channel_view.tmpl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/templates/channel_view.tmpl Mon Feb 09 00:24:13 2009 +0200 @@ -0,0 +1,9 @@ +<%inherit file="channel.tmpl" /> + +

${channel.title} » Last ${count} lines

+ +% for line in lines : +${line} +% endfor + + diff -r aaa62c8e5bd5 -r f13cf27a360b templates/layout.tmpl --- a/templates/layout.tmpl Sun Feb 08 04:59:22 2009 +0200 +++ b/templates/layout.tmpl Mon Feb 09 00:24:13 2009 +0200 @@ -4,6 +4,10 @@ +<%def name="footer_right()"> + + + irclogs.qmsk.net${(' :: ' + channel.title) if channel else ''} @@ -11,7 +15,7 @@
@@ -19,11 +23,16 @@