implement more LogSource features (logs for date, cleanup last_logs), implement irssi parser, formatter, other misc. stuff
--- a/channels.py Sun Feb 08 04:59:22 2009 +0200
+++ b/channels.py Mon Feb 09 00:24:13 2009 +0200
@@ -9,6 +9,7 @@
from log_channel import LogChannel
from log_source import LogDirectory
+from log_parser import IrssiParser
relpath = lambda path : os.path.join(os.path.dirname(__file__), path)
@@ -16,14 +17,20 @@
"""
The list of channels, and related methods
"""
+
+ # timezone to use
+ TIMEZONE = pytz.timezone('Europe/Helsinki')
+
+ # the parser that we use
+ PARSER = IrssiParser(TIMEZONE, "%H:%M:%S")
# the statically defined channel list
CHANNELS = {
'tycoon': LogChannel('tycoon', "OFTC", "#tycoon",
- LogDirectory(relpath('logs/tycoon'), pytz.timezone('Europe/Helsinki'))
+ LogDirectory(relpath('logs/tycoon'), TIMEZONE, PARSER)
),
'openttd': LogChannel('openttd', "OFTC", "#openttd",
- LogDirectory(relpath('logs/openttd'), pytz.timezone('Europe/Helsinki'))
+ LogDirectory(relpath('logs/openttd'), TIMEZONE, PARSER)
),
}
--- a/handlers.py Sun Feb 08 04:59:22 2009 +0200
+++ b/handlers.py Mon Feb 09 00:24:13 2009 +0200
@@ -4,10 +4,11 @@
from qmsk.web import http, template
-import urls, channels
+import urls, channels, helpers
# load templates from here
templates = template.TemplateLoader("templates",
+ h = helpers,
urls = urls,
channel_list = channels.channel_list,
)
@@ -28,37 +29,61 @@
return http.Redirect(urls.channel_view.build(request, channel=channel.id))
-def channel_view (request, channel, count) :
+def channel_view (request, channel, count, formatter) :
"""
The main channel view page, display the most important info, and all requisite links
"""
-
- if count == 'all' :
- xxx
+
+ # get latest events
+ lines = channel.source.get_latest(count)
- else :
- count = int(count)
+ # format
+ lines = formatter.format_html(lines)
- return templates.render_to_response("channel",
+ return templates.render_to_response("channel_view",
req = request,
channel = channel,
count = count,
- lines = channel.source.get_latest(count),
+ formatter = formatter,
+ lines = lines,
)
- pass
-
def channel_last (request, channel, count, format) :
"""
Display the last x lines of channel messages in various formats
"""
if format == 'txt' :
- return http.Response('\n'.join(channel.source.get_latest(count)), 'text/plain')
-
+ # XXX: formatting
+# return http.Response('\n'.join(str(channel.source.get_latest(count))), 'text/plain')
+ pass
+
else :
raise http.ResponseError("Unknown filetype %r" % format)
+def channel_date (request, channel, date, formatter) :
+ """
+ Display all log data for the given date
+ """
+
+ # XXX: fix date timezone
+ import pytz
+ date = date.replace(tzinfo=pytz.utc)
+
+ # get latest events
+ lines = channel.source.get_date(date)
+
+ # format
+ lines = formatter.format_html(lines)
+
+ return templates.render_to_response("channel_date",
+ req = request,
+ channel = channel,
+ formatter = formatter,
+ date = date,
+ lines = lines,
+ )
+
def channel_search (request, channel, q) :
"""
Display the search form for the channel for GET, or do the search for POST
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/helpers.py Mon Feb 09 00:24:13 2009 +0200
@@ -0,0 +1,22 @@
+"""
+ Some additional helpers
+"""
+
+# "inherit" qmsk.web's helpers
+from qmsk.web.helpers import *
+
+def tz_name (tz) :
+ """
+ Returns a string describing the given timezone
+ """
+
+ return str(tz)
+
+def fmt_date (date) :
+ """
+ Formats a date
+ """
+
+ # XXX: hardcoded
+ return date.strftime('%Y-%m-%d')
+
--- a/log_event.py Sun Feb 08 04:59:22 2009 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-"""
- An IRC logfile consists of a series of events, a.k.a. "lines"
-"""
-
-class LogEvent (object) :
- """
- An event on some specific channel
- """
-
- # the event ype
- type = None
-
- # the UTC timestamp of the event
- timestamp = None
-
- # the event source
- source = None
-
- # associated data (message, etc)
- data = None
-
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/log_formatter.py Mon Feb 09 00:24:13 2009 +0200
@@ -0,0 +1,106 @@
+"""
+ Format LogLines into some other representation
+"""
+
+from qmsk.web import helpers
+
+from log_line import LogTypes
+
+class LogFormatter (object) :
+ """
+ Provides a method to format series of LogLines into various output formats, with varying themes
+ """
+
+ def __init__ (self, tz, timestamp_fmt="%H:%M:%S") :
+ """
+ Initialize to format timestamps with the given timezone and timestamp
+ """
+
+ self.tz = tz
+ self.timestamp_fmt = timestamp_fmt
+
+ def _format_line_text (self, line, template_dict) :
+ """
+ Format the given line as text, using the given { type: string template } dict
+ """
+
+ # look up the template
+ template = template_dict[line.type]
+
+ # build timestamp
+ timestamp = line.timestamp.astimezone(self.tz).strftime(self.timestamp_fmt)
+
+ # format with dict
+ return template % dict(
+ timestamp = timestamp,
+ source = line.source,
+ data = line.data,
+ )
+
+ def format_txt (self, lines) :
+ """
+ Format as plaintext
+ """
+
+ abstract
+
+ def format_html (self, lines) :
+ """
+ Format as HTML
+ """
+
+ abstract
+
+class IrssiTextFormatter (LogFormatter) :
+ """
+ Implements format_txt for irssi-style output
+ """
+
+ # format definitions by type
+ __FMT = {
+ LogTypes.RAW : "%(timestamp)s %(data)s",
+ }
+
+ def format_txt (self, lines) :
+ # ...handle each line
+ for line in lines :
+ # using __TYPES
+ yield self._format_line_text(line, self.__FMT)
+
+class IrssiFormatter (IrssiTextFormatter) :
+ """
+ Implements plain black-and-white irssi-style formatting
+ """
+
+ def format_html (self, lines) :
+ """
+ Just uses format_txt, but wraps in <pre></pre>
+ """
+
+ # open pre
+ yield "<pre>"
+
+ # format using IrssiTextFormatter
+ for line in self.format_txt(lines) :
+ # escape HTML
+ yield helpers.escape(line)
+
+ # close pre
+ yield "</pre>"
+
+# define formatters by name
+FORMATTERS = {
+ 'irssi': IrssiFormatter,
+}
+
+def by_name (name) :
+ """
+ Lookup and return a formatter by name
+
+ XXX: uses default timezone/timefmt
+ """
+
+ import pytz
+
+ return FORMATTERS[name](pytz.utc)
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/log_line.py Mon Feb 09 00:24:13 2009 +0200
@@ -0,0 +1,39 @@
+"""
+ An IRC logfile consists of a series of lines/events
+"""
+
+class LogTypes :
+ """
+ Definitions of the various LogLines types
+ """
+
+ # unknown type, may or may not have a timestamp, no source, only data
+ RAW = 0x01
+
+class LogLine (object) :
+ """
+ An event on some specific channel
+ """
+
+ # the event type, as defiend in LogTypes
+ type = None
+
+ # the UTC timestamp of the event
+ timestamp = None
+
+ # the event source
+ source = None
+
+ # associated data (message, etc)
+ data = None
+
+ def __init__ (self, type, timestamp, source, data) :
+ """
+ Initialize with given values
+ """
+
+ self.type = type
+ self.timestamp = timestamp
+ self.source = source
+ self.data = data
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/log_parser.py Mon Feb 09 00:24:13 2009 +0200
@@ -0,0 +1,67 @@
+"""
+ Parse log data into log_events
+"""
+
+import datetime
+
+import log_line
+from log_line import LogTypes
+
+class LogParser (object) :
+ """
+ Abstract interface
+ """
+
+ def __init__ (self, tz, timestamp_fmt="%H:%M:%S") :
+ """
+ Setup the parser to use the given format for line timestamps, which are of the given timezone
+ """
+
+ self.tz = tz
+ self.timestamp_fmt = timestamp_fmt
+
+ def parse_lines (self, lines, date=None) :
+ """
+ Parse the given (iterable) lines of unicode text into a LogEvent, no trailing newline.
+
+ Giving date lets the parser build full timestamps, otherwise, unless line timestamps have full date
+ information, event timestamps will have a date component of 1900/1/1.
+ """
+
+ abstract
+
+
+class IrssiParser (LogParser) :
+ """
+ A parser for irssi logfiles
+ """
+
+ def parse_lines (self, lines, date=None) :
+ """
+ Parse the given lines, yielding LogEvents.
+ """
+
+ for line in lines :
+ # status lines
+ if line.startswith('---') :
+ # XXX: handle these
+ continue
+
+ # normal lines
+ else :
+ # XXX: only parse timestamps for now
+ timestamp, data = line.split(' ', 1)
+
+ # parse timestamp into naive datetime
+ dt = datetime.datetime.strptime(timestamp, self.timestamp_fmt)
+
+ # override date?
+ if date :
+ dt = dt.replace(year=date.year, month=date.month, day=date.day)
+
+ # now localize with timezone
+ dtz = self.tz.localize(dt)
+
+ # yield raw events
+ yield log_line.LogLine(LogTypes.RAW, dtz, None, data)
+
--- a/log_source.py Sun Feb 08 04:59:22 2009 +0200
+++ b/log_source.py Mon Feb 09 00:24:13 2009 +0200
@@ -2,13 +2,10 @@
A source of IRC log files
"""
-import codecs
-from datetime import date, datetime, timedelta
+import datetime, itertools
+import os, errno
import pytz
-# for SEEK_*, errno
-import os, errno
-
class LogSource (object) :
"""
A collection of IRC logs for a specific target in some format. Provides the possibility to read specific events
@@ -20,19 +17,30 @@
"""
abstract
+
+ def get_date (self, dt) :
+ """
+ Get logs for the given date (as a datetime)
+ """
+
+ abstract
class LogFile (LogSource) :
"""
A file containing LogEvents
"""
- def __init__ (self, path, charset='utf-8', sep='\n') :
+ def __init__ (self, path, parser, start_date=None, charset='utf-8', sep='\n') :
"""
- Open the file at the given path, which contains data of the given codec, as lines separated by the given separator
+ Open the file at the given path, which contains data with the given charset, as lines separated by the
+ given separator. Lines are parsed using the given parser, using the given date as an initial date, see
+ LogParser for more info. XXX: currently we assume start_date also for the end of the file
"""
# store
self.path = path
+ self.parser = parser
+ self.start_date = start_date
self.charset = charset
self.sep = sep
@@ -41,22 +49,68 @@
def __iter__ (self) :
"""
- Yields a series of lines, as read from the top of the file
+ Yields a series of unicode lines, as read from the top of the file
"""
# seek to beginning
self.file.seek(0)
- # iterate over lines
- return iter(self.file)
+ # iterate over lines, decoding them as well
+ return (line.decode(self.charset) for line in self.file)
- def get_latest (self, count) :
+ def read_full (self) :
"""
- Returns up to <count> lines from the end of the file, or less, if the file doesn't contain that many lines
+ Reads all LogLines
+ """
+
+ # just use our __iter__
+ return self.parser.parse_lines(self, self.start_date)
+
+ def read_from (self, dt) :
+ """
+ Reads all LogLines from the given naive timestamp onwards
+ """
+
+ # start reading at beginning
+ events = self.read_full()
+
+ # skip unwanted events
+ for event in events :
+ if event.timestamp < dt :
+ continue
+
+ else :
+ # include this line as well
+ yield event
+ break
+
+ # yield the rest as-is
+ for event in events :
+ yield event
+
+ def read_until (self, dt) :
+ """
+ Reads all LogLines up until the given naive timestamp
"""
- # the list of lines
- lines = []
+ # start reading events at the beginning
+ events = self.read_full()
+
+ # yield events until we hit the given timestamp
+ for event in events :
+ if event.timestamp <= dt :
+ yield event
+
+ else :
+ break
+
+ # ignore the rest
+ return
+
+ def _read_blocks_reverse (self, blocksize=1024) :
+ """
+ Yields blocks of file data in reverse order, starting at the end of the file
+ """
# seek to end of file
self.file.seek(0, os.SEEK_END)
@@ -64,20 +118,14 @@
# read offset
# XXX: hack -1 to get rid of trailing newline
size = offset = self.file.tell() - 1
-
- # use this blocksize
- BLOCKSIZE = 1024
-
- # trailing data
- buf = ''
-
- # read a block at a time, backwards
- while len(lines) < count and offset > 0:
+
+ # do not try to read past the beginning of the file
+ while offset > 0:
# calc new offset + size
- if offset > BLOCKSIZE :
+ if offset > blocksize :
# full block
- offset -= BLOCKSIZE
- read_size = BLOCKSIZE
+ offset -= blocksize
+ read_size = blocksize
else :
# partial block
@@ -88,47 +136,77 @@
self.file.seek(offset)
# read the data we want
- read_buf = self.file.read(read_size)
- read_len = len(read_buf)
+ block = self.file.read(read_size)
# sanity check
- assert read_len == read_size
+ assert len(block) == read_size
+ # yield
+ yield block
+
+ def _read_lines_reverse (self) :
+ """
+ Yields decoded lines from the end of the file, in reverse order.
+ """
+
+ # partial lines
+ buf = ''
+
+ # read from end of file, a block at a time
+ for block in self._read_blocks_reverse() :
# add in our previous buf
- buf = read_buf + buf
+ buf = block + buf
- # split out lines
- buf_lines = buf.split(self.sep)
+ # split up lines
+ lines = buf.split(self.sep)
# keep the first one as our buffer, as it's incomplete
- buf = buf_lines[0]
+ buf = lines[0]
+
+ # yield the rest a line at a time in reverse order... this looks weird, but that's how slicing works :)
+ # XXX: use something like islice, this has to build a slice object
+ for line in lines[:0:-1] :
+ yield line.decode(self.charset)
- # prepend up to count lines from the end to our lines buffer
- lines = buf_lines[-min(count, len(buf_lines) - 1):] + lines
+ def get_latest (self, count) :
+ """
+ Returns up to count events, from the end of the file, or less, if the file doesn't contain that many lines.
+ """
+
+ # the list of lines
+ lines = []
+
+ # start reading lines into lines
+ for line in self._read_lines_reverse() :
+ # append
+ lines.append(line)
+
+ # done?
+ if len(lines) >= count :
+ break
- # decode
- # XXX: better queue implementation, plz
- lines = [line.decode(self.charset) for line in lines]
-
- # return the line list
- return lines
+ # decode in reverse order, using our starting date....
+ # XXX: use lines[::-1] or reversed?
+ # XXX: it may make more sense to parse in reverse order, using 'self.end_date' or something like that
+ return self.parser.parse_lines(reversed(lines), self.start_date)
class LogDirectory (LogSource) :
"""
A directory containing a series of timestamped LogFiles
"""
- def __init__ (self, path, tz, charset='utf-8', filename_fmt='%Y-%m-%d') :
+ def __init__ (self, path, tz, parser, charset='utf-8', filename_fmt='%Y-%m-%d') :
"""
Load the logfiles at the given path.
The files contain data in the given charset, and are named according the the date in the given timezone and
- date format.
+ date format, and will be parsed using the given parser.
"""
# store
self.path = path
self.tz = tz
+ self.parser = parser
self.charset = charset
self.filename_fmt = filename_fmt
@@ -155,9 +233,9 @@
path = os.path.join(self.path, filename)
# return the LogFile
- return LogFile(path, self.charset)
+ return LogFile(path, self.parser, d, self.charset)
- def _iter_backwards (self, dt=None) :
+ def _iter_date_reverse (self, dt=None) :
"""
Yields an infinite series of naive date objects in our timezone, iterating backwards in time starting at the
given *datetime*, or the the current date, if none given
@@ -165,13 +243,13 @@
# default to now
if not dt :
- dt = datetime.now(pytz.utc)
+ dt = datetime.datetime.now(pytz.utc)
# convert to target timezone
dtz = dt.astimezone(self.tz)
# our timedelta
- ONE_DAY = timedelta(1)
+ ONE_DAY = datetime.timedelta(1)
# iterate unto infinity
while True :
@@ -187,7 +265,7 @@
"""
# iterate backwards from now
- day_iter = self._iter_backwards()
+ day_iter = self._iter_date_reverse()
# number of files read
files = 0
@@ -195,7 +273,7 @@
# only read up to 100 files or so
MAX_FILES = 100
- # read the lines into here
+ # read the events into here
lines = []
# loop until done
@@ -221,9 +299,37 @@
# skip to next day
continue
- # read the lines
- lines = logfile.get_latest(count) + lines
+ # read the events
+ # XXX: use a queue
+ lines = list(logfile.get_latest(count)) + lines
- # return the lines
+ # return the events
return lines
+ def get_date (self, dt) :
+ """
+ A 'day' is considered to be a 24-hour period from 00:00:00 23:59:59. If the timezone of the given datetime
+ differs from our native datetime, this may involve lines from more than one logfile.
+ """
+
+ # begin/end of 24h period, in target timezone
+ dtz_begin = dt.replace(hour=0, minute=0, second=0).astimezone(self.tz)
+ dtz_end = dt.replace(hour=23, minute=59, second=59, microsecond=999999).astimezone(self.tz)
+
+ # as dates
+ d_begin = dtz_begin.date()
+ d_end = dtz_end.date()
+
+ # if they're the same, just pull the full log for that date
+ if d_begin == d_end :
+ return self._get_logfile_date(d_begin).read_full()
+
+ # otherwise, we need to pull two partial logs
+ else :
+ # open both of them
+ f_begin = self._get_logfile_date(d_begin)
+ f_end = self._get_logfile_date(d_end)
+
+ # chain together the two sources
+ return itertools.chain(f_begin.read_from(dtz_begin), f_end.read_until(dtz_end))
+
--- a/static/irclogs.css Sun Feb 08 04:59:22 2009 +0200
+++ b/static/irclogs.css Mon Feb 09 00:24:13 2009 +0200
@@ -51,7 +51,7 @@
height: 1.5em;
}
-#menu li:first {
+#menu li:first-child {
border-left: none;
}
@@ -93,10 +93,6 @@
background-color: #d0d0d0;
}
-#menu form option {
- background-color: auto;
-}
-
/*
* Content
*/
@@ -108,8 +104,7 @@
* Footer
*/
div#footer {
- width: 100%;
- padding: 10px 0px 10px;
+ padding: 10px;
border-top: 1px dashed #a5a5a5;
--- a/templates/channel.tmpl Sun Feb 08 04:59:22 2009 +0200
+++ b/templates/channel.tmpl Mon Feb 09 00:24:13 2009 +0200
@@ -45,11 +45,8 @@
</ul>
</%def>
-<h1>${channel.title} » Last ${count} lines</h1>
+${next.body()}
-<pre>
-% for line in lines :
-${line | h}
-% endfor
-</pre>
-
+<%def name="footer_right()">
+ All times are in <strong>${h.tz_name(formatter.tz)}</strong>
+</%def>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/templates/channel_date.tmpl Mon Feb 09 00:24:13 2009 +0200
@@ -0,0 +1,9 @@
+<%inherit file="channel.tmpl" />
+
+<h1>${channel.title} » Logs for ${h.fmt_date(date)}</h1>
+
+% for line in lines :
+${line}\
+% endfor
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/templates/channel_view.tmpl Mon Feb 09 00:24:13 2009 +0200
@@ -0,0 +1,9 @@
+<%inherit file="channel.tmpl" />
+
+<h1>${channel.title} » Last ${count} lines</h1>
+
+% for line in lines :
+${line}
+% endfor
+
+
--- a/templates/layout.tmpl Sun Feb 08 04:59:22 2009 +0200
+++ b/templates/layout.tmpl Mon Feb 09 00:24:13 2009 +0200
@@ -4,6 +4,10 @@
</%def>
+<%def name="footer_right()">
+
+</%def>
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<title>irclogs.qmsk.net${(' :: ' + channel.title) if channel else ''}</title>
@@ -11,7 +15,7 @@
</head>
<body>
<div id="menu">
- ${self.menu()}
+ ${next.menu()}
</div>
<div id="content">
@@ -19,11 +23,16 @@
</div>
<div id="footer">
- <div id="footer-left">
+ <div id="footer-right">
+ ${next.footer_right()}
</div>
-
+
+ <div id="footer-left">
+
+ </div>
+
<div id="footer-center">
- <!-- ${h.validation_notice(req.site_host)} -->
+ ${h.validation_notice(req.site_host)}
</div>
</div>
</body>
--- a/urls.py Sun Feb 08 04:59:22 2009 +0200
+++ b/urls.py Mon Feb 09 00:24:13 2009 +0200
@@ -4,35 +4,32 @@
"""
# urltree stuff
-from qmsk.web.urltree import URLConfig, URL, URLTree
+from qmsk.web import urltree
# our own handlers
import handlers
# for types
-import channels
+import channels, log_formatter, utils
# our URLConfig
-url_config = URLConfig(
+urls = url = urltree.URLConfig(
type_dict = {
# lookup LogChannel
- 'cid': channels.channel_list.lookup
+ 'cid': channels.channel_list.lookup,
+ 'fmt': log_formatter.by_name,
+ 'date': utils.Date(None, '%Y-%m-%d'),
}
)
-# shortcut for building an URL with our url_config
-def url (*args, **kwargs) :
- return URL(url_config, *args, **kwargs)
-
# urls
index = url('/', handlers.index )
channel_select = url('/channel_select/?channel:cid', handlers.channel_select )
-channel_view = url('/channels/{channel:cid}/?count:str=10', handlers.channel_view )
+channel_view = url('/channels/{channel:cid}/?count:int=10&formatter:fmt=irssi', handlers.channel_view )
channel_last = url('/channels/{channel:cid}/last/{count:int=100}/{format=html}', handlers.channel_last )
-channel_search = url('/channels/{channel:cid}/search/?q', handlers.channel_search )
+channel_date = url('/channels/{channel:cid}/date/{date:date}/?formatter:fmt=irssi', handlers.channel_date )
+channel_search = url('/channels/{channel:cid}/search/?q', handlers.channel_search )
# mapper
-mapper = URLTree(
- [index, channel_select, channel_view, channel_last, channel_search]
-)
+mapper = urltree.URLTree(urls)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utils.py Mon Feb 09 00:24:13 2009 +0200
@@ -0,0 +1,28 @@
+"""
+ Miscellaneous things
+"""
+
+import datetime
+
+class Date (object) :
+ """
+ Handle dates in URLs as datetime objects (with indeterminate time info) in some timezone
+ """
+
+ def __init__ (self, tz, date_fmt="%Y-%m-%d") :
+ """
+ Format/parse dates in the given timezone using the given format
+ """
+
+ self.tz = tz
+ self.date_fmt = date_fmt
+
+ __name__ = "date"
+
+ def __call__ (self, date_str) :
+ """
+ Parse the given date string
+ """
+
+ return datetime.datetime.strptime(date_str, self.date_fmt).replace(tzinfo=self.tz)
+