--- a/scripts/search-index Wed Feb 11 03:05:11 2009 +0200
+++ b/scripts/search-index Wed Feb 11 03:32:21 2009 +0200
@@ -35,11 +35,12 @@
# return
return index, channel
-def _insert_lines (index, options, channel, lines) :
+def _iter_insert_stats (index, channel, lines) :
"""
Insert the given lines into the index.
- Assumes the lines will be in time-order, and prints out as status messages the date and count for the inserted lines
+ Assumes the lines will be in time-order, and yields a series of (date, count) tuples for every date that lines
+ are inserted for
"""
# last date
@@ -50,20 +51,17 @@
# iter lines
for line in lines :
- # output new date header?
- if not options.quiet and (not date or line.timestamp.date() != date) :
- # previous date's line count?
+ # next day?
+ if not date or line.timestamp.date() != date :
if date :
- print "OK: %d lines" % count
-
+ # yield stats
+ yield date, count
+
# reset count
count = 0
# timestamp's date
date = line.timestamp.date()
-
- # status header
- print "%s:" % (date.strftime('%Y-%m-%d'), ),
# insert
index.insert_line(channel, line)
@@ -71,9 +69,22 @@
# count
count += 1
- # final count line
- if not options.quiet and date :
- print "OK: %d lines" % count
+ # final count
+ if date :
+ yield date, count
+
+def _insert_lines (index, options, channel, lines) :
+ """
+ Insert the given lines into the index.
+
+ Assumes the lines will be in time-order, and prints out as status messages the date and count for the inserted lines
+ """
+
+ # iterate insert stats
+ for date, count in _iter_insert_stats(index, channel, lines) :
+ # output date header?
+ if not options.quiet :
+ print "%s: %s" % (date.strftime('%Y-%m-%d'), count),
def _load_channel_date (index, options, channel, date) :
"""
@@ -263,18 +274,47 @@
# iterate channels
for channel in channels :
if not options.quiet :
- print "Channel %s:" % channel.id,
+ print "Channel %s:" % channel.id
+
+ # no 'after' by default
+ after = None
# path to our state file
statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id)
-
+ statefile_tmppath = statefile_path + '.tmp'
+
+ # do we have a tempfile from a previous crash?
+ # XXX: locking
+ if os.path.exists(statefile_tmppath) :
+ # load after from it
+ statefile_tmp = open(statefile_tmppath, 'r+')
+
+ # read after timestamp
+ after_str = statefile_tmp.read().rstrip()
+
+ if after_str :
+ # XXX: one day backwards
+ after = utils.from_utc_timestamp(int(after_str))
+
+ if not options.quiet :
+ print "\tContinuing earlier progress: after=%s" % after
+
+ else :
+ # ignore
+ if not options.quiet :
+ print "\tIgnoring empty temporary statefile"
+
+ else :
+ # open new tempfile
+ statefile_tmp = open(statefile_tmppath, 'w')
+
# override?
if options.reload :
# load all
mtime = None
if not options.quiet :
- print "reloading all:",
+ print "\tForcing reload!"
# stat for mtime
else :
@@ -282,32 +322,48 @@
mtime = utils.mtime(statefile_path, ignore_missing=True)
if mtime and not options.quiet :
- print "last load=%s:" % mtime,
+ print "\tLast load time was %s" % mtime
elif not options.quiet :
- print "no previous load state:",
+ print "\tN previous load state!"
# only after some specific date?
if options.after :
- after = options.after
+ # use unless read from tempfile
+ if not after :
+ after = options.after
+
+ print "\tUsing after = %s" % after
- print "after=%s:" % after,
+ else :
+ print "\tIgnoring --after because we found a tempfile"
- else :
- after = None
-
# get lines
lines = channel.source.get_modified(mtime, after)
# insert
if not options.quiet :
- print "inserting..."
-
- _insert_lines(index, options, channel, lines)
+ print "\tLoading and inserting..."
+ print
+
+ # iterate insert() per day to display info and update progress
+ for date, count in _iter_insert_stats(index, channel, lines) :
+ # output date header?
+ if not options.quiet :
+ print "\t%10s: %d" % (date.strftime('%Y-%m-%d'), count)
+
+ # write temp state
+ statefile_tmp.seek(0)
+ statefile_tmp.write(str(utils.to_utc_timestamp(datetime.datetime.combine(date, datetime.time(0)))))
+ statefile_tmp.flush()
# write autoload state
open(statefile_path, 'w').close()
+ # close+delete tempfile
+ statefile_tmp.close()
+ os.remove(statefile_tmppath)
+
def cmd_help (options, *args) :
"""
Help about commands