implement tempfile use for scripts/search-index autoload, so that it can resume aborted sessions
authorTero Marttila <terom@fixme.fi>
Wed, 11 Feb 2009 03:32:21 +0200
changeset 98 8c6e36849f9a
parent 97 6165f1ba458d
child 99 8719ac564b22
implement tempfile use for scripts/search-index autoload, so that it can resume aborted sessions
scripts/search-index
--- a/scripts/search-index	Wed Feb 11 03:05:11 2009 +0200
+++ b/scripts/search-index	Wed Feb 11 03:32:21 2009 +0200
@@ -35,11 +35,12 @@
     # return
     return index, channel
 
-def _insert_lines (index, options, channel, lines) :
+def _iter_insert_stats (index, channel, lines) :
     """
         Insert the given lines into the index.
 
-        Assumes the lines will be in time-order, and prints out as status messages the date and count for the inserted lines
+        Assumes the lines will be in time-order, and yields a series of (date, count) tuples for every date that lines
+        are inserted for
     """
 
     # last date
@@ -50,20 +51,17 @@
 
     # iter lines
     for line in lines :
-        # output new date header?
-        if not options.quiet and (not date or line.timestamp.date() != date) :
-            # previous date's line count?
+        # next day?
+        if not date or line.timestamp.date() != date :
             if date :
-                print "OK: %d lines" % count
-            
+                # yield stats
+                yield date, count
+
             # reset count
             count = 0
 
             # timestamp's date
             date = line.timestamp.date()
-            
-            # status header
-            print "%s:" % (date.strftime('%Y-%m-%d'), ),
 
         # insert
         index.insert_line(channel, line)
@@ -71,9 +69,22 @@
         # count
         count += 1
     
-    # final count line
-    if not options.quiet and date :
-        print "OK: %d lines" % count
+    # final count
+    if date :
+        yield date, count
+
+def _insert_lines (index, options, channel, lines) :
+    """
+        Insert the given lines into the index.
+
+        Assumes the lines will be in time-order, and prints out as status messages the date and count for the inserted lines
+    """
+    
+    # iterate insert stats
+    for date, count in _iter_insert_stats(index, channel, lines) :
+        # output date header?
+        if not options.quiet :
+            print "%s: %s" % (date.strftime('%Y-%m-%d'), count),
 
 def _load_channel_date (index, options, channel, date) :
     """
@@ -263,18 +274,47 @@
     # iterate channels
     for channel in channels :
         if not options.quiet :
-            print "Channel %s:" % channel.id,
+            print "Channel %s:" % channel.id
+
+        # no 'after' by default
+        after = None
 
         # path to our state file
         statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id)
-       
+        statefile_tmppath = statefile_path + '.tmp'
+        
+        # do we have a tempfile from a previous crash?
+        # XXX: locking
+        if os.path.exists(statefile_tmppath) :
+            # load after from it
+            statefile_tmp = open(statefile_tmppath, 'r+')
+            
+            # read after timestamp
+            after_str = statefile_tmp.read().rstrip()
+
+            if after_str :
+                # XXX: one day backwards
+                after = utils.from_utc_timestamp(int(after_str))
+
+                if not options.quiet :
+                    print "\tContinuing earlier progress: after=%s" % after
+
+            else :
+                # ignore
+                if not options.quiet :
+                    print "\tIgnoring empty temporary statefile"
+
+        else :
+            # open new tempfile
+            statefile_tmp = open(statefile_tmppath, 'w')
+
         # override?
         if options.reload :
             # load all
             mtime = None
 
             if not options.quiet :
-                print "reloading all:",
+                print "\tForcing reload!"
 
         # stat for mtime
         else :
@@ -282,32 +322,48 @@
             mtime = utils.mtime(statefile_path, ignore_missing=True)
 
             if mtime and not options.quiet :
-                print "last load=%s:" % mtime,
+                print "\tLast load time was %s" % mtime
 
             elif not options.quiet :
-                print "no previous load state:",
+                print "\tN previous load state!"
  
         # only after some specific date?
         if options.after :
-            after = options.after
+            # use unless read from tempfile
+            if not after :
+                after = options.after
+                
+                print "\tUsing after = %s" % after
             
-            print "after=%s:" % after,
+            else :
+                print "\tIgnoring --after because we found a tempfile"
 
-        else :
-            after = None
-        
         # get lines
         lines = channel.source.get_modified(mtime, after)
         
         # insert
         if not options.quiet :
-            print "inserting..."
-        
-        _insert_lines(index, options, channel, lines)
+            print "\tLoading and inserting..."
+            print
+     
+        # iterate insert() per day to display info and update progress
+        for date, count in _iter_insert_stats(index, channel, lines) :
+            # output date header?
+            if not options.quiet :
+                print "\t%10s: %d" % (date.strftime('%Y-%m-%d'), count)
+            
+            # write temp state
+            statefile_tmp.seek(0)
+            statefile_tmp.write(str(utils.to_utc_timestamp(datetime.datetime.combine(date, datetime.time(0)))))
+            statefile_tmp.flush()
 
         # write autoload state
         open(statefile_path, 'w').close()
 
+        # close+delete tempfile
+        statefile_tmp.close()
+        os.remove(statefile_tmppath)
+
 def cmd_help (options, *args) :
     """
         Help about commands