scripts/search-index
changeset 98 8c6e36849f9a
parent 95 ebdbda3dd5d0
child 99 8719ac564b22
equal deleted inserted replaced
97:6165f1ba458d 98:8c6e36849f9a
    33     channel = config.LOG_CHANNELS.lookup(channel_name)
    33     channel = config.LOG_CHANNELS.lookup(channel_name)
    34     
    34     
    35     # return
    35     # return
    36     return index, channel
    36     return index, channel
    37 
    37 
    38 def _insert_lines (index, options, channel, lines) :
    38 def _iter_insert_stats (index, channel, lines) :
    39     """
    39     """
    40         Insert the given lines into the index.
    40         Insert the given lines into the index.
    41 
    41 
    42         Assumes the lines will be in time-order, and prints out as status messages the date and count for the inserted lines
    42         Assumes the lines will be in time-order, and yields a series of (date, count) tuples for every date that lines
       
    43         are inserted for
    43     """
    44     """
    44 
    45 
    45     # last date
    46     # last date
    46     date = None
    47     date = None
    47 
    48 
    48     # count
    49     # count
    49     count = 0
    50     count = 0
    50 
    51 
    51     # iter lines
    52     # iter lines
    52     for line in lines :
    53     for line in lines :
    53         # output new date header?
    54         # next day?
    54         if not options.quiet and (not date or line.timestamp.date() != date) :
    55         if not date or line.timestamp.date() != date :
    55             # previous date's line count?
       
    56             if date :
    56             if date :
    57                 print "OK: %d lines" % count
    57                 # yield stats
    58             
    58                 yield date, count
       
    59 
    59             # reset count
    60             # reset count
    60             count = 0
    61             count = 0
    61 
    62 
    62             # timestamp's date
    63             # timestamp's date
    63             date = line.timestamp.date()
    64             date = line.timestamp.date()
    64             
       
    65             # status header
       
    66             print "%s:" % (date.strftime('%Y-%m-%d'), ),
       
    67 
    65 
    68         # insert
    66         # insert
    69         index.insert_line(channel, line)
    67         index.insert_line(channel, line)
    70 
    68 
    71         # count
    69         # count
    72         count += 1
    70         count += 1
    73     
    71     
    74     # final count line
    72     # final count
    75     if not options.quiet and date :
    73     if date :
    76         print "OK: %d lines" % count
    74         yield date, count
       
    75 
       
    76 def _insert_lines (index, options, channel, lines) :
       
    77     """
       
    78         Insert the given lines into the index.
       
    79 
       
    80         Assumes the lines will be in time-order, and prints out as status messages the date and count for the inserted lines
       
    81     """
       
    82     
       
    83     # iterate insert stats
       
    84     for date, count in _iter_insert_stats(index, channel, lines) :
       
    85         # output date header?
       
    86         if not options.quiet :
       
    87             print "%s: %s" % (date.strftime('%Y-%m-%d'), count),
    77 
    88 
    78 def _load_channel_date (index, options, channel, date) :
    89 def _load_channel_date (index, options, channel, date) :
    79     """
    90     """
    80         Loads the logs for the given date from the channel's LogSource into the given LogSearchIndex
    91         Loads the logs for the given date from the channel's LogSource into the given LogSearchIndex
    81     """
    92     """
   261         channels = [config.LOG_CHANNELS.lookup(channel_name) for channel_name in channel_names]
   272         channels = [config.LOG_CHANNELS.lookup(channel_name) for channel_name in channel_names]
   262 
   273 
   263     # iterate channels
   274     # iterate channels
   264     for channel in channels :
   275     for channel in channels :
   265         if not options.quiet :
   276         if not options.quiet :
   266             print "Channel %s:" % channel.id,
   277             print "Channel %s:" % channel.id
       
   278 
       
   279         # no 'after' by default
       
   280         after = None
   267 
   281 
   268         # path to our state file
   282         # path to our state file
   269         statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id)
   283         statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id)
   270        
   284         statefile_tmppath = statefile_path + '.tmp'
       
   285         
       
   286         # do we have a tempfile from a previous crash?
       
   287         # XXX: locking
       
   288         if os.path.exists(statefile_tmppath) :
       
   289             # load after from it
       
   290             statefile_tmp = open(statefile_tmppath, 'r+')
       
   291             
       
   292             # read after timestamp
       
   293             after_str = statefile_tmp.read().rstrip()
       
   294 
       
   295             if after_str :
       
   296                 # XXX: one day backwards
       
   297                 after = utils.from_utc_timestamp(int(after_str))
       
   298 
       
   299                 if not options.quiet :
       
   300                     print "\tContinuing earlier progress: after=%s" % after
       
   301 
       
   302             else :
       
   303                 # ignore
       
   304                 if not options.quiet :
       
   305                     print "\tIgnoring empty temporary statefile"
       
   306 
       
   307         else :
       
   308             # open new tempfile
       
   309             statefile_tmp = open(statefile_tmppath, 'w')
       
   310 
   271         # override?
   311         # override?
   272         if options.reload :
   312         if options.reload :
   273             # load all
   313             # load all
   274             mtime = None
   314             mtime = None
   275 
   315 
   276             if not options.quiet :
   316             if not options.quiet :
   277                 print "reloading all:",
   317                 print "\tForcing reload!"
   278 
   318 
   279         # stat for mtime
   319         # stat for mtime
   280         else :
   320         else :
   281             # stat for mtime, None if unknown
   321             # stat for mtime, None if unknown
   282             mtime = utils.mtime(statefile_path, ignore_missing=True)
   322             mtime = utils.mtime(statefile_path, ignore_missing=True)
   283 
   323 
   284             if mtime and not options.quiet :
   324             if mtime and not options.quiet :
   285                 print "last load=%s:" % mtime,
   325                 print "\tLast load time was %s" % mtime
   286 
   326 
   287             elif not options.quiet :
   327             elif not options.quiet :
   288                 print "no previous load state:",
   328                 print "\tN previous load state!"
   289  
   329  
   290         # only after some specific date?
   330         # only after some specific date?
   291         if options.after :
   331         if options.after :
   292             after = options.after
   332             # use unless read from tempfile
       
   333             if not after :
       
   334                 after = options.after
       
   335                 
       
   336                 print "\tUsing after = %s" % after
   293             
   337             
   294             print "after=%s:" % after,
   338             else :
   295 
   339                 print "\tIgnoring --after because we found a tempfile"
   296         else :
   340 
   297             after = None
       
   298         
       
   299         # get lines
   341         # get lines
   300         lines = channel.source.get_modified(mtime, after)
   342         lines = channel.source.get_modified(mtime, after)
   301         
   343         
   302         # insert
   344         # insert
   303         if not options.quiet :
   345         if not options.quiet :
   304             print "inserting..."
   346             print "\tLoading and inserting..."
   305         
   347             print
   306         _insert_lines(index, options, channel, lines)
   348      
       
   349         # iterate insert() per day to display info and update progress
       
   350         for date, count in _iter_insert_stats(index, channel, lines) :
       
   351             # output date header?
       
   352             if not options.quiet :
       
   353                 print "\t%10s: %d" % (date.strftime('%Y-%m-%d'), count)
       
   354             
       
   355             # write temp state
       
   356             statefile_tmp.seek(0)
       
   357             statefile_tmp.write(str(utils.to_utc_timestamp(datetime.datetime.combine(date, datetime.time(0)))))
       
   358             statefile_tmp.flush()
   307 
   359 
   308         # write autoload state
   360         # write autoload state
   309         open(statefile_path, 'w').close()
   361         open(statefile_path, 'w').close()
       
   362 
       
   363         # close+delete tempfile
       
   364         statefile_tmp.close()
       
   365         os.remove(statefile_tmppath)
   310 
   366 
   311 def cmd_help (options, *args) :
   367 def cmd_help (options, *args) :
   312     """
   368     """
   313         Help about commands
   369         Help about commands
   314     """
   370     """