1 """ |
1 """ |
2 A source of IRC log files |
2 A source of IRC log files |
3 """ |
3 """ |
4 |
4 |
5 import datetime, calendar, itertools, functools, math |
5 import datetime, calendar, itertools, functools, math |
6 import os, errno |
6 import os, os.path, errno |
7 import pytz |
7 import pytz |
8 |
8 |
9 import config |
9 import config, utils |
10 |
10 |
11 class LogSourceDecoder (object) : |
11 class LogSourceDecoder (object) : |
12 """ |
12 """ |
13 Handles decoding of LogSource lines |
13 Handles decoding of LogSource lines |
14 """ |
14 """ |
159 """ |
159 """ |
160 Return a sequence of dates, telling which days in the given month (as a datetime) have logs available |
160 Return a sequence of dates, telling which days in the given month (as a datetime) have logs available |
161 """ |
161 """ |
162 |
162 |
163 abstract |
163 abstract |
|
164 |
|
165 def get_modified (self, dt=None) : |
|
166 """ |
|
167 Returns a sequence of LogLines that may have been *modified* from their old values since the given datetime. |
|
168 |
|
169 If the datetime is not given, *all* lines are returned |
|
170 |
|
171 The LogLines should be in time order. |
|
172 """ |
|
173 |
|
174 abstract |
164 |
175 |
165 class LogFile (object) : |
176 class LogFile (object) : |
166 """ |
177 """ |
167 A file containing LogEvents |
178 A file containing LogEvents |
168 |
179 |
362 dtz = dt.astimezone(self.tz) |
373 dtz = dt.astimezone(self.tz) |
363 |
374 |
364 # convert to date and use that |
375 # convert to date and use that |
365 return self._get_logfile_date(dtz.date()) |
376 return self._get_logfile_date(dtz.date()) |
366 |
377 |
367 def _get_logfile_date (self, d, load=True) : |
378 def _get_logfile_date (self, d, load=True, stat=True, ignore_missing=True) : |
368 """ |
379 """ |
369 Get the logfile corresponding to the given naive date in our timezone. If load is False, only test for the |
380 Get the logfile corresponding to the given naive date in our timezone. |
370 presence of the logfile, do not actually open it. |
381 |
371 |
382 If load is False, only test for the presence of the logfile, do not actually open it. If stat is given, |
372 Returns None if the logfile does not exist. |
383 then this returns the stat() result |
|
384 |
|
385 Returns None if the logfile does not exist, unless ignore_missing is given as False. |
373 """ |
386 """ |
374 |
387 |
375 # format filename |
388 # format filename |
376 filename = d.strftime(self.filename_fmt) |
389 filename = d.strftime(self.filename_fmt) |
377 |
390 |
381 try : |
394 try : |
382 if load : |
395 if load : |
383 # open+return the LogFile |
396 # open+return the LogFile |
384 return LogFile(path, self.parser, self.decoder, start_date=d, channel=self.channel) |
397 return LogFile(path, self.parser, self.decoder, start_date=d, channel=self.channel) |
385 |
398 |
|
399 elif stat : |
|
400 # stat |
|
401 return os.stat(path) |
|
402 |
386 else : |
403 else : |
387 # test |
404 # test |
388 return os.path.exists(path) |
405 return os.path.exists(path) |
389 |
406 |
390 # XXX: move to LogFile |
407 # XXX: move to LogFile |
391 except IOError, e : |
408 except IOError, e : |
392 # return None for missing files |
409 # return None for missing files |
393 if e.errno == errno.ENOENT : |
410 if e.errno == errno.ENOENT and ignore_missing : |
394 return None |
411 return None |
395 |
412 |
396 else : |
413 else : |
397 raise |
414 raise |
398 |
415 |
|
416 def _iter_logfile_dates (self) : |
|
417 """ |
|
418 Yields a series of naive datetime objects representing the logfiles that are available, in time order |
|
419 """ |
|
420 |
|
421 # listdir |
|
422 filenames = os.listdir(self.path) |
|
423 |
|
424 # sort |
|
425 filenames.sort() |
|
426 |
|
427 # iter files |
|
428 for filename in filenames : |
|
429 try : |
|
430 # parse date + yield |
|
431 yield datetime.datetime.strptime(filename, self.filename_fmt).replace(tzinfo=self.tz) |
|
432 |
|
433 except : |
|
434 # ignore |
|
435 continue |
|
436 |
399 def _iter_date_reverse (self, dt=None) : |
437 def _iter_date_reverse (self, dt=None) : |
400 """ |
438 """ |
401 Yields an infinite series of naive date objects in our timezone, iterating backwards in time starting at the |
439 Yields an infinite series of naive date objects in our timezone, iterating backwards in time starting at the |
402 given *datetime*, or the the current date, if none given |
440 given *datetime*, or the the current date, if none given |
403 """ |
441 """ |
564 # test for it |
602 # test for it |
565 if self._get_logfile_date(log_date, load=False) : |
603 if self._get_logfile_date(log_date, load=False) : |
566 # valid |
604 # valid |
567 yield dt.date() |
605 yield dt.date() |
568 |
606 |
|
607 def get_modified (self, dt=None) : |
|
608 """ |
|
609 Returns the contents off all logfiles with mtimes past the given date |
|
610 """ |
|
611 |
|
612 # iterate through all available logfiles in date order, as datetimes |
|
613 for log_date in self._iter_logfile_dates() : |
|
614 # compare against dt? |
|
615 if dt : |
|
616 # stat |
|
617 st = self._get_logfile_date(log_date, load=False, stat=True) |
|
618 |
|
619 # not modified? |
|
620 if utils.from_utc_timestamp(st.st_mtime) < dt : |
|
621 # skip |
|
622 continue |
|
623 |
|
624 # open |
|
625 logfile = self._get_logfile_date(log_date, ignore_missing=False) |
|
626 |
|
627 # yield all lines |
|
628 for line in logfile.read_full() : |
|
629 yield line |
|
630 |