log_source.py
changeset 82 afd3120ec71e
parent 81 745032a57803
child 83 a34e9f56ddda
equal deleted inserted replaced
81:745032a57803 82:afd3120ec71e
     4 
     4 
     5 import datetime, calendar, itertools, functools, math
     5 import datetime, calendar, itertools, functools, math
     6 import os, errno
     6 import os, errno
     7 import pytz
     7 import pytz
     8 
     8 
       
     9 import config
       
    10 
       
    11 class LogSourceDecoder (object) :
       
    12     """
       
    13         Handles decoding of LogSource lines
       
    14     """
       
    15 
       
    16     def __init__ (self, encoding_list) :
       
    17         """
       
    18             Will try each of the given (charset, errors) items in turn, until one succeeds
       
    19         """
       
    20 
       
    21         self.encoding_list = encoding_list
       
    22     
       
    23     def decode (self, line) :
       
    24         """
       
    25             Decode the line of str() text into an unicode object
       
    26         """
       
    27         
       
    28         # list of errors encountered
       
    29         error_list = []
       
    30         
       
    31         # try each in turn
       
    32         for charset, errors in self.encoding_list :
       
    33             # trap UnicodeDecodeError to try with the next one
       
    34             try :
       
    35                 return line.decode(charset, errors)
       
    36 
       
    37             except UnicodeDecodeError, e :
       
    38                 error_list.append("%s:%s - %s" % (charset, errors, e))
       
    39                 continue
       
    40 
       
    41         # failure
       
    42         raise UnicodeDecodeError("Failed to decode line: %r: %s" % (line, ', '.join(error_list)))
       
    43 
     9 class LogSource (object) :
    44 class LogSource (object) :
    10     """
    45     """
    11         A collection of IRC logs for a specific target in some format. Provides the possibility to read specific events
    46         A collection of IRC logs for a specific target in some format. Provides the possibility to read specific events
    12     """
    47     """
       
    48     
       
    49     def __init__ (self, decoder) :
       
    50         """
       
    51             Use the given LogSourceDecoder
       
    52         """
       
    53 
       
    54         self.decoder = decoder
    13     
    55     
    14     def get_latest (self, count) :
    56     def get_latest (self, count) :
    15         """
    57         """
    16             Yield the latest events, up to `count` of them.
    58             Yield the latest events, up to `count` of them.
    17         """
    59         """
   104         """
   146         """
   105             Get a set of dates, telling which days in the given month (as a datetime) have logs available
   147             Get a set of dates, telling which days in the given month (as a datetime) have logs available
   106         """
   148         """
   107 
   149 
   108         abstract
   150         abstract
   109  
   151 
   110 class LogFile (object) :
   152 class LogFile (object) :
   111     """
   153     """
   112         A file containing LogEvents
   154         A file containing LogEvents
   113 
   155 
   114         XXX: modify to implement LogSource?
   156         XXX: modify to implement LogSource?
   115     """
   157     """
   116 
   158 
   117     def __init__ (self, path, parser, charset, start_date=None, sep='\n') :
   159     def __init__ (self, path, parser, decoder, start_date=None, sep='\n') :
   118         """
   160         """
   119             Open the file at the given path, which contains data with the given charset, as lines separated by the
   161             Open the file at the given path, which contains lines as separated by the given separator. Lines are
   120             given separator. Lines are parsed using the given parser, using the given date as an initial date, see
   162             decoded using the given LogSourceDecoder, and then parsed using the given parser, using the given date
   121             LogParser for more info. XXX: currently we assume start_date also for the end of the file
   163             as the initial date for this log's first line.
       
   164             
       
   165             XXX: currently we assume start_date also for the end of the file
   122         """
   166         """
   123         
   167         
   124         # store
   168         # store
   125         self.path = path
   169         self.path = path
   126         self.parser = parser
   170         self.parser = parser
   127         self.start_date = start_date
   171         self.start_date = start_date
   128         self.charset = charset
   172         self.decoder = decoder
   129         self.sep = sep
   173         self.sep = sep
   130 
   174 
   131         # open
   175         # open
   132         self.file = open(path, 'rb')
   176         self.file = open(path, 'rb')
   133 
   177 
   138         
   182         
   139         # seek to beginning
   183         # seek to beginning
   140         self.file.seek(0)
   184         self.file.seek(0)
   141 
   185 
   142         # iterate over lines, decoding them as well
   186         # iterate over lines, decoding them as well
   143         return (line.decode(self.charset).rstrip(self.sep) for line in self.file)
   187         return (self.decoder.decode(line.rstrip(self.sep)) for line in self.file)
   144     
   188     
   145     def read_full (self) :
   189     def read_full (self) :
   146         """
   190         """
   147             Reads all LogLines. The LogLines will have a valid offset.
   191             Reads all LogLines. The LogLines will have a valid offset.
   148         """
   192         """
   277 class LogDirectory (LogSource) :
   321 class LogDirectory (LogSource) :
   278     """
   322     """
   279         A directory containing a series of timestamped LogFiles
   323         A directory containing a series of timestamped LogFiles
   280     """
   324     """
   281 
   325 
   282     def __init__ (self, path, tz, parser, charset, filename_fmt) :
   326     def __init__ (self, path, tz, parser, decoder, filename_fmt) :
   283         """
   327         """
   284             Load the logfiles at the given path.
   328             Load the logfiles at the given path.
   285             
   329             
   286             The files contain data in the given charset, and are named according the the date in the given timezone and
   330             Decode the file lines using the given decoder, the files are named according the the date in the given
   287             date format, and will be parsed using the given parser.
   331             timezone and date format, and will be parsed using the given parser.
   288         """
   332         """
   289 
   333 
   290         # store
   334         # store
   291         self.path = path
   335         self.path = path
   292         self.tz = tz
   336         self.tz = tz
   293         self.parser = parser
   337         self.parser = parser
   294         self.charset = charset
   338         self.decoder = decoder
   295         self.filename_fmt = filename_fmt
   339         self.filename_fmt = filename_fmt
   296 
   340 
   297     def _get_logfile_datetime (self, dt) :
   341     def _get_logfile_datetime (self, dt) :
   298         """
   342         """
   299             Get the logfile corresponding to the given datetime
   343             Get the logfile corresponding to the given datetime
   320         path = os.path.join(self.path, filename)
   364         path = os.path.join(self.path, filename)
   321         
   365         
   322         try :
   366         try :
   323             if load :
   367             if load :
   324                 # open+return the LogFile
   368                 # open+return the LogFile
   325                 return LogFile(path, self.parser, self.charset, d)
   369                 return LogFile(path, self.parser, self.decoder, d)
   326             
   370             
   327             else :
   371             else :
   328                 # test
   372                 # test
   329                 return os.path.exists(path)
   373                 return os.path.exists(path)
   330 
   374