# HG changeset patch # User Tero Marttila # Date 1234315941 -7200 # Node ID 8c6e36849f9a9df810331bcd71dc7b72c66accc2 # Parent 6165f1ba458dc1a767ad41cddab6eaf9c574f09c implement tempfile use for scripts/search-index autoload, so that it can resume aborted sessions diff -r 6165f1ba458d -r 8c6e36849f9a scripts/search-index --- a/scripts/search-index Wed Feb 11 03:05:11 2009 +0200 +++ b/scripts/search-index Wed Feb 11 03:32:21 2009 +0200 @@ -35,11 +35,12 @@ # return return index, channel -def _insert_lines (index, options, channel, lines) : +def _iter_insert_stats (index, channel, lines) : """ Insert the given lines into the index. - Assumes the lines will be in time-order, and prints out as status messages the date and count for the inserted lines + Assumes the lines will be in time-order, and yields a series of (date, count) tuples for every date that lines + are inserted for """ # last date @@ -50,20 +51,17 @@ # iter lines for line in lines : - # output new date header? - if not options.quiet and (not date or line.timestamp.date() != date) : - # previous date's line count? + # next day? + if not date or line.timestamp.date() != date : if date : - print "OK: %d lines" % count - + # yield stats + yield date, count + # reset count count = 0 # timestamp's date date = line.timestamp.date() - - # status header - print "%s:" % (date.strftime('%Y-%m-%d'), ), # insert index.insert_line(channel, line) @@ -71,9 +69,22 @@ # count count += 1 - # final count line - if not options.quiet and date : - print "OK: %d lines" % count + # final count + if date : + yield date, count + +def _insert_lines (index, options, channel, lines) : + """ + Insert the given lines into the index. + + Assumes the lines will be in time-order, and prints out as status messages the date and count for the inserted lines + """ + + # iterate insert stats + for date, count in _iter_insert_stats(index, channel, lines) : + # output date header? + if not options.quiet : + print "%s: %s" % (date.strftime('%Y-%m-%d'), count), def _load_channel_date (index, options, channel, date) : """ @@ -263,18 +274,47 @@ # iterate channels for channel in channels : if not options.quiet : - print "Channel %s:" % channel.id, + print "Channel %s:" % channel.id + + # no 'after' by default + after = None # path to our state file statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id) - + statefile_tmppath = statefile_path + '.tmp' + + # do we have a tempfile from a previous crash? + # XXX: locking + if os.path.exists(statefile_tmppath) : + # load after from it + statefile_tmp = open(statefile_tmppath, 'r+') + + # read after timestamp + after_str = statefile_tmp.read().rstrip() + + if after_str : + # XXX: one day backwards + after = utils.from_utc_timestamp(int(after_str)) + + if not options.quiet : + print "\tContinuing earlier progress: after=%s" % after + + else : + # ignore + if not options.quiet : + print "\tIgnoring empty temporary statefile" + + else : + # open new tempfile + statefile_tmp = open(statefile_tmppath, 'w') + # override? if options.reload : # load all mtime = None if not options.quiet : - print "reloading all:", + print "\tForcing reload!" # stat for mtime else : @@ -282,32 +322,48 @@ mtime = utils.mtime(statefile_path, ignore_missing=True) if mtime and not options.quiet : - print "last load=%s:" % mtime, + print "\tLast load time was %s" % mtime elif not options.quiet : - print "no previous load state:", + print "\tN previous load state!" # only after some specific date? if options.after : - after = options.after + # use unless read from tempfile + if not after : + after = options.after + + print "\tUsing after = %s" % after - print "after=%s:" % after, + else : + print "\tIgnoring --after because we found a tempfile" - else : - after = None - # get lines lines = channel.source.get_modified(mtime, after) # insert if not options.quiet : - print "inserting..." - - _insert_lines(index, options, channel, lines) + print "\tLoading and inserting..." + print + + # iterate insert() per day to display info and update progress + for date, count in _iter_insert_stats(index, channel, lines) : + # output date header? + if not options.quiet : + print "\t%10s: %d" % (date.strftime('%Y-%m-%d'), count) + + # write temp state + statefile_tmp.seek(0) + statefile_tmp.write(str(utils.to_utc_timestamp(datetime.datetime.combine(date, datetime.time(0))))) + statefile_tmp.flush() # write autoload state open(statefile_path, 'w').close() + # close+delete tempfile + statefile_tmp.close() + os.remove(statefile_tmppath) + def cmd_help (options, *args) : """ Help about commands