log_source.py
changeset 115 751e3fcd11d2
parent 112 090192b64d7e
equal deleted inserted replaced
114:d4848d807fd1 115:751e3fcd11d2
   382         self.tz = tz
   382         self.tz = tz
   383         self.parser = parser
   383         self.parser = parser
   384         self.decoder = decoder
   384         self.decoder = decoder
   385         self.filename_fmt = filename_fmt
   385         self.filename_fmt = filename_fmt
   386 
   386 
   387     def _get_logfile_datetime (self, dt) :
   387     def _get_logfile_date (self, d, load=True, mtime=False, ignore_missing=False) :
   388         """
       
   389             Get the logfile corresponding to the given datetime
       
   390         """
       
   391 
       
   392         # convert to target timezone
       
   393         dtz = dt.astimezone(self.tz)
       
   394         
       
   395         # convert to date and use that
       
   396         return self._get_logfile_date(dtz.date())
       
   397 
       
   398     def _get_logfile_date (self, d, load=True, mtime=False, ignore_missing=True) :
       
   399         """
   388         """
   400             Get the logfile corresponding to the given naive date in our timezone. 
   389             Get the logfile corresponding to the given naive date in our timezone. 
   401             
   390             
   402             If load is False, only test for the presence of the logfile, do not actually open it. If mtime is given,
   391             If load is False, only test for the presence of the logfile, do not actually open it. If mtime is given,
   403             then this returns the file's mtime
   392             then this returns the file's mtime
   441                 after   only dates from said date onwards will be returned
   430                 after   only dates from said date onwards will be returned
   442                 until   only dates up to and including said date will be returned
   431                 until   only dates up to and including said date will be returned
   443                 reverse the dates are returned in reverse order instead. Note that the meaning of after/until doesn't change
   432                 reverse the dates are returned in reverse order instead. Note that the meaning of after/until doesn't change
   444         """
   433         """
   445 
   434 
       
   435         # convert timestamps to our timezone's dates
       
   436         if after :
       
   437             after = after.astimezone(self.tz).date()
       
   438 
       
   439         if until :
       
   440             until = until.astimezone(self.tz).date()
       
   441 
   446         # listdir
   442         # listdir
   447         filenames = os.listdir(self.path)
   443         filenames = os.listdir(self.path)
   448 
   444 
   449         # sort
   445         # sort
   450         filenames.sort(reverse=reverse)
   446         filenames.sort(reverse=reverse)
   451 
   447 
   452         # iter files
   448         # iter files
   453         for filename in filenames :
   449         for filename in filenames :
   454             try :
   450             try :
   455                 # parse date
   451                 # parse date
   456                 date = self.tz.localize(datetime.datetime.strptime(filename, self.filename_fmt))
   452                 dt = self.tz.localize(datetime.datetime.strptime(filename, self.filename_fmt))
       
   453                 date = dt.date()
   457             
   454             
   458             except :
   455             except :
   459                 # ignore
   456                 # ignore
   460                 continue
   457                 continue
   461 
   458 
   463                 if (after and date < after) or (until and date > until) :
   460                 if (after and date < after) or (until and date > until) :
   464                     # ignore
   461                     # ignore
   465                     continue
   462                     continue
   466                 
   463                 
   467                 else :
   464                 else :
   468 #                    print
       
   469 #                    print "iter_logfile_dates: after=%s, until=%s, reverse=%s -> %s" % (after, until, reverse, date)
       
   470 
       
   471                     # yield
   465                     # yield
   472                     yield date
   466                     yield dt
   473             
   467             
   474     def _iter_date_reverse (self, dt=None) :
   468     def _iter_date_reverse (self, dt=None) :
   475         """
   469         """
   476             Yields an infinite series of naive date objects in our timezone, iterating backwards in time starting at the
   470             Yields an infinite series of naive date objects in our timezone, iterating backwards in time starting at the
   477             given *datetime*, or the the current date, if none given
   471             given *datetime*, or the the current date, if none given
   515             
   509             
   516             # try and open the next logfile
   510             # try and open the next logfile
   517             logfile = None
   511             logfile = None
   518             
   512             
   519             file_count += 1
   513             file_count += 1
   520             logfile = self._get_logfile_date(day)
   514             logfile = self._get_logfile_date(day, ignore_missing=True)
   521             
   515             
   522             # no logfile there?
   516             # no logfile there?
   523             if not logfile :
   517             if not logfile :
   524                 # hit our limit?
   518                 # hit our limit?
   525                 if file_count > max_files :
   519                 if file_count > max_files :
   584         # if they're the same, just pull the full log for that date
   578         # if they're the same, just pull the full log for that date
   585         if d_begin == d_end :
   579         if d_begin == d_end :
   586             # open that log
   580             # open that log
   587             logfile = self._get_logfile_date(d_begin)
   581             logfile = self._get_logfile_date(d_begin)
   588             
   582             
   589             if not logfile :
       
   590                 raise Exception("No logfile for date=%r" % (dt, ))
       
   591             
       
   592             # return the full data
   583             # return the full data
   593             return logfile.read_full()
   584             return logfile.read_full()
   594         
   585         
   595         # otherwise, we need to pull two partial logs
   586         # otherwise, we need to pull two partial logs
   596         else :
   587         else :
   597             # open both of them
   588             # open both of them, but it's okay if we don't have the second one
   598             f_begin = self._get_logfile_date(d_begin)
   589             f_begin = self._get_logfile_date(d_begin)
   599             f_end = self._get_logfile_date(d_end)
   590             f_end = self._get_logfile_date(d_end, ignore_missing=True)
   600             
   591 
   601             # chain together the two sources
   592             # chain together the two sources
   602             return itertools.chain(
   593             return itertools.chain(
   603                 f_begin.read_from(dtz_begin), 
   594                 f_begin.read_from(dtz_begin), 
   604                 f_end.read_until(dtz_end) if f_end else []
   595                 f_end.read_until(dtz_end) if f_end else []
   605             )
   596             )
   632         for dt in self._iter_month_days(month) :
   623         for dt in self._iter_month_days(month) :
   633             # date in our target timezone
   624             # date in our target timezone
   634             log_date = dt.astimezone(self.tz).date()
   625             log_date = dt.astimezone(self.tz).date()
   635             
   626             
   636             # test for it
   627             # test for it
   637             if self._get_logfile_date(log_date, load=False) :
   628             if self._get_logfile_date(log_date, load=False, ignore_missing=True) :
   638                 # valid
   629                 # valid
   639                 yield dt.date()
   630                 yield dt.date()
   640 
   631 
   641     def get_modified (self, dt=None, after=None, until=None) :
   632     def get_modified (self, dt=None, after=None, until=None) :
   642         """
   633         """
   646         # iterate through all available logfiles in date order, as datetimes, from the given date on
   637         # iterate through all available logfiles in date order, as datetimes, from the given date on
   647         for log_date in self._iter_logfile_dates(after, until) :
   638         for log_date in self._iter_logfile_dates(after, until) :
   648             # compare against dt?
   639             # compare against dt?
   649             if dt :
   640             if dt :
   650                 # stat
   641                 # stat
   651                 mtime = self._get_logfile_date(log_date, load=False, mtime=True)
   642                 mtime = self._get_logfile_date(log_date, load=False, mtime=True, ignore_missing=True)
   652                 
   643                 
   653                 # not modified?
   644                 # not modified?
   654                 if mtime < dt :
   645                 if mtime < dt :
   655                     # skip
   646                     # skip
   656                     continue
   647                     continue
   657                 
   648                 
   658             # open
   649             # open
   659             logfile = self._get_logfile_date(log_date, ignore_missing=False)
   650             logfile = self._get_logfile_date(log_date)
   660 
   651 
   661             # yield all lines
   652             # yield all lines
   662             for line in logfile.read_full() :
   653             for line in logfile.read_full() :
   663                 yield line
   654                 yield line
   664 
   655