4 |
4 |
5 import datetime, calendar, itertools, functools, math |
5 import datetime, calendar, itertools, functools, math |
6 import os, errno |
6 import os, errno |
7 import pytz |
7 import pytz |
8 |
8 |
|
9 import config |
|
10 |
|
11 class LogSourceDecoder (object) : |
|
12 """ |
|
13 Handles decoding of LogSource lines |
|
14 """ |
|
15 |
|
16 def __init__ (self, encoding_list) : |
|
17 """ |
|
18 Will try each of the given (charset, errors) items in turn, until one succeeds |
|
19 """ |
|
20 |
|
21 self.encoding_list = encoding_list |
|
22 |
|
23 def decode (self, line) : |
|
24 """ |
|
25 Decode the line of str() text into an unicode object |
|
26 """ |
|
27 |
|
28 # list of errors encountered |
|
29 error_list = [] |
|
30 |
|
31 # try each in turn |
|
32 for charset, errors in self.encoding_list : |
|
33 # trap UnicodeDecodeError to try with the next one |
|
34 try : |
|
35 return line.decode(charset, errors) |
|
36 |
|
37 except UnicodeDecodeError, e : |
|
38 error_list.append("%s:%s - %s" % (charset, errors, e)) |
|
39 continue |
|
40 |
|
41 # failure |
|
42 raise UnicodeDecodeError("Failed to decode line: %r: %s" % (line, ', '.join(error_list))) |
|
43 |
9 class LogSource (object) : |
44 class LogSource (object) : |
10 """ |
45 """ |
11 A collection of IRC logs for a specific target in some format. Provides the possibility to read specific events |
46 A collection of IRC logs for a specific target in some format. Provides the possibility to read specific events |
12 """ |
47 """ |
|
48 |
|
49 def __init__ (self, decoder) : |
|
50 """ |
|
51 Use the given LogSourceDecoder |
|
52 """ |
|
53 |
|
54 self.decoder = decoder |
13 |
55 |
14 def get_latest (self, count) : |
56 def get_latest (self, count) : |
15 """ |
57 """ |
16 Yield the latest events, up to `count` of them. |
58 Yield the latest events, up to `count` of them. |
17 """ |
59 """ |
104 """ |
146 """ |
105 Get a set of dates, telling which days in the given month (as a datetime) have logs available |
147 Get a set of dates, telling which days in the given month (as a datetime) have logs available |
106 """ |
148 """ |
107 |
149 |
108 abstract |
150 abstract |
109 |
151 |
110 class LogFile (object) : |
152 class LogFile (object) : |
111 """ |
153 """ |
112 A file containing LogEvents |
154 A file containing LogEvents |
113 |
155 |
114 XXX: modify to implement LogSource? |
156 XXX: modify to implement LogSource? |
115 """ |
157 """ |
116 |
158 |
117 def __init__ (self, path, parser, charset, start_date=None, sep='\n') : |
159 def __init__ (self, path, parser, decoder, start_date=None, sep='\n') : |
118 """ |
160 """ |
119 Open the file at the given path, which contains data with the given charset, as lines separated by the |
161 Open the file at the given path, which contains lines as separated by the given separator. Lines are |
120 given separator. Lines are parsed using the given parser, using the given date as an initial date, see |
162 decoded using the given LogSourceDecoder, and then parsed using the given parser, using the given date |
121 LogParser for more info. XXX: currently we assume start_date also for the end of the file |
163 as the initial date for this log's first line. |
|
164 |
|
165 XXX: currently we assume start_date also for the end of the file |
122 """ |
166 """ |
123 |
167 |
124 # store |
168 # store |
125 self.path = path |
169 self.path = path |
126 self.parser = parser |
170 self.parser = parser |
127 self.start_date = start_date |
171 self.start_date = start_date |
128 self.charset = charset |
172 self.decoder = decoder |
129 self.sep = sep |
173 self.sep = sep |
130 |
174 |
131 # open |
175 # open |
132 self.file = open(path, 'rb') |
176 self.file = open(path, 'rb') |
133 |
177 |
277 class LogDirectory (LogSource) : |
321 class LogDirectory (LogSource) : |
278 """ |
322 """ |
279 A directory containing a series of timestamped LogFiles |
323 A directory containing a series of timestamped LogFiles |
280 """ |
324 """ |
281 |
325 |
282 def __init__ (self, path, tz, parser, charset, filename_fmt) : |
326 def __init__ (self, path, tz, parser, decoder, filename_fmt) : |
283 """ |
327 """ |
284 Load the logfiles at the given path. |
328 Load the logfiles at the given path. |
285 |
329 |
286 The files contain data in the given charset, and are named according the the date in the given timezone and |
330 Decode the file lines using the given decoder, the files are named according the the date in the given |
287 date format, and will be parsed using the given parser. |
331 timezone and date format, and will be parsed using the given parser. |
288 """ |
332 """ |
289 |
333 |
290 # store |
334 # store |
291 self.path = path |
335 self.path = path |
292 self.tz = tz |
336 self.tz = tz |
293 self.parser = parser |
337 self.parser = parser |
294 self.charset = charset |
338 self.decoder = decoder |
295 self.filename_fmt = filename_fmt |
339 self.filename_fmt = filename_fmt |
296 |
340 |
297 def _get_logfile_datetime (self, dt) : |
341 def _get_logfile_datetime (self, dt) : |
298 """ |
342 """ |
299 Get the logfile corresponding to the given datetime |
343 Get the logfile corresponding to the given datetime |
320 path = os.path.join(self.path, filename) |
364 path = os.path.join(self.path, filename) |
321 |
365 |
322 try : |
366 try : |
323 if load : |
367 if load : |
324 # open+return the LogFile |
368 # open+return the LogFile |
325 return LogFile(path, self.parser, self.charset, d) |
369 return LogFile(path, self.parser, self.decoder, d) |
326 |
370 |
327 else : |
371 else : |
328 # test |
372 # test |
329 return os.path.exists(path) |
373 return os.path.exists(path) |
330 |
374 |