--- a/log_source.py Tue Feb 10 03:48:51 2009 +0200
+++ b/log_source.py Tue Feb 10 04:27:22 2009 +0200
@@ -6,11 +6,53 @@
import os, errno
import pytz
+import config
+
+class LogSourceDecoder (object) :
+ """
+ Handles decoding of LogSource lines
+ """
+
+ def __init__ (self, encoding_list) :
+ """
+ Will try each of the given (charset, errors) items in turn, until one succeeds
+ """
+
+ self.encoding_list = encoding_list
+
+ def decode (self, line) :
+ """
+ Decode the line of str() text into an unicode object
+ """
+
+ # list of errors encountered
+ error_list = []
+
+ # try each in turn
+ for charset, errors in self.encoding_list :
+ # trap UnicodeDecodeError to try with the next one
+ try :
+ return line.decode(charset, errors)
+
+ except UnicodeDecodeError, e :
+ error_list.append("%s:%s - %s" % (charset, errors, e))
+ continue
+
+ # failure
+ raise UnicodeDecodeError("Failed to decode line: %r: %s" % (line, ', '.join(error_list)))
+
class LogSource (object) :
"""
A collection of IRC logs for a specific target in some format. Provides the possibility to read specific events
"""
+ def __init__ (self, decoder) :
+ """
+ Use the given LogSourceDecoder
+ """
+
+ self.decoder = decoder
+
def get_latest (self, count) :
"""
Yield the latest events, up to `count` of them.
@@ -106,7 +148,7 @@
"""
abstract
-
+
class LogFile (object) :
"""
A file containing LogEvents
@@ -114,18 +156,20 @@
XXX: modify to implement LogSource?
"""
- def __init__ (self, path, parser, charset, start_date=None, sep='\n') :
+ def __init__ (self, path, parser, decoder, start_date=None, sep='\n') :
"""
- Open the file at the given path, which contains data with the given charset, as lines separated by the
- given separator. Lines are parsed using the given parser, using the given date as an initial date, see
- LogParser for more info. XXX: currently we assume start_date also for the end of the file
+ Open the file at the given path, which contains lines as separated by the given separator. Lines are
+ decoded using the given LogSourceDecoder, and then parsed using the given parser, using the given date
+ as the initial date for this log's first line.
+
+ XXX: currently we assume start_date also for the end of the file
"""
# store
self.path = path
self.parser = parser
self.start_date = start_date
- self.charset = charset
+ self.decoder = decoder
self.sep = sep
# open
@@ -140,7 +184,7 @@
self.file.seek(0)
# iterate over lines, decoding them as well
- return (line.decode(self.charset).rstrip(self.sep) for line in self.file)
+ return (self.decoder.decode(line.rstrip(self.sep)) for line in self.file)
def read_full (self) :
"""
@@ -279,19 +323,19 @@
A directory containing a series of timestamped LogFiles
"""
- def __init__ (self, path, tz, parser, charset, filename_fmt) :
+ def __init__ (self, path, tz, parser, decoder, filename_fmt) :
"""
Load the logfiles at the given path.
- The files contain data in the given charset, and are named according the the date in the given timezone and
- date format, and will be parsed using the given parser.
+ Decode the file lines using the given decoder, the files are named according the the date in the given
+ timezone and date format, and will be parsed using the given parser.
"""
# store
self.path = path
self.tz = tz
self.parser = parser
- self.charset = charset
+ self.decoder = decoder
self.filename_fmt = filename_fmt
def _get_logfile_datetime (self, dt) :
@@ -322,7 +366,7 @@
try :
if load :
# open+return the LogFile
- return LogFile(path, self.parser, self.charset, d)
+ return LogFile(path, self.parser, self.decoder, d)
else :
# test