scripts/search-index
changeset 93 48fca00689e3
parent 89 2dc6de43f317
child 94 6673de9bc911
--- a/scripts/search-index	Wed Feb 11 01:21:22 2009 +0200
+++ b/scripts/search-index	Wed Feb 11 02:07:07 2009 +0200
@@ -7,10 +7,11 @@
 # XXX: fix path
 import sys; sys.path.insert(0, '.'); sys.path.insert(0, '..')
 
+import os, os.path
 import datetime, pytz
 
 # configuration and the LogSearchIndex module
-import config, log_search, channels
+import config, utils, log_search, channels
 
 def _open_index (options, open_mode) :
     """
@@ -34,13 +35,53 @@
     # return
     return index, channel
 
+def _insert_lines (index, options, channel, lines) :
+    """
+        Insert the given lines into the index.
+
+        Assumes the lines will be in time-order, and prints out as status messages the date and count for the inserted lines
+    """
+
+    # last date
+    date = None
+
+    # count
+    count = 0
+
+    # iter lines
+    for line in lines :
+        # output new date header?
+        if not options.quiet and (not date or line.timestamp.date() != date) :
+            # previous date's line count?
+            if date :
+                print "OK: %d lines" % count
+            
+            # reset count
+            count = 0
+
+            # timestamp's date
+            date = line.timestamp.date()
+            
+            # status header
+            print "%s:" % (date.strftime('%Y-%m-%d'), ),
+
+        # insert
+        index.insert_line(channel, line)
+
+        # count
+        count += 1
+    
+    # final count line
+    if not options.quiet and date :
+        print "OK: %d lines" % count
+
 def _load_channel_date (index, options, channel, date) :
     """
         Loads the logs for the given date from the channel's LogSource into the given LogSearchIndex
     """
 
     if not options.quiet :
-        print "%s %s..." % (channel.id, date.strftime(channel.source.filename_fmt)),
+        print "Loading date for channel %s" % channel.id
         
     try :
         # load lines for date
@@ -51,15 +92,11 @@
             raise
             
         if not options.quiet :
-            print "Skipped: %s" % (e, )
+            print "\tSkipped: %s" % (e, )
     
     else :
-        # insert -> count
-        count = index.insert(channel, lines)
-
-        if not options.quiet :
-            print "OK: %d lines" % count
-
+        # insert
+        _insert_lines(index, options, channel, lines)
 
 def _parse_date (options, date_str, tz=None, fmt='%Y-%m-%d') :
     """
@@ -208,6 +245,65 @@
         # display
         _output_lines(options, lines)
 
+def cmd_autoload (options, *channel_names) :
+    """
+        Automatically loads all channel logs that have not been indexed yet (by logfile mtime)
+    """
+    
+    # open index
+    index = _open_index(options, 'c' if options.create else 'a')
+
+    # default to all channels
+    if not channel_names :
+        channels = config.LOG_CHANNELS
+    
+    else :
+        channels = [config.LOG_CHANNELS.lookup(channel_name) for channel_name in channel_names]
+
+    # iterate channels
+    for channel in channels :
+        if not options.quiet :
+            print "Channel %s:" % channel.id,
+
+        # path to our state file
+        statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id)
+        
+        # override?
+        if options.reload :
+            # load all
+            mtime = None
+
+            if not options.quiet :
+                print "reloading all:",
+
+        # stat for mtime
+        # XXX: replace with single utils.mtime()
+        elif os.path.exists(statefile_path) :
+            # get last update date for channel
+            mtime = utils.from_utc_timestamp(os.stat(statefile_path).st_mtime)
+            
+            if not options.quiet :
+                print "last load=%s:" % mtime,
+
+        else :
+            # unknown, load all
+            mtime = None
+            
+            if not options.quiet :
+                print "no previous load state:",
+        
+        # get lines
+        lines = channel.source.get_modified(mtime)
+        
+        # insert
+        if not options.quiet :
+            print "inserting..."
+        
+        _insert_lines(index, options, channel, lines)
+
+        # write autoload state
+        open(statefile_path, 'w').close()
+
 def cmd_help (options, *args) :
     """
         Help about commands
@@ -271,10 +367,12 @@
         choices=[fmt_name for fmt_name in config.LOG_FORMATTERS.iterkeys()])
 
     parser.add_option('-I', "--index",          dest="index_path",      help="Index database path",                 metavar="PATH", default="logs/index")
+    parser.add_option(      "--autoload-state", dest="autoload_state_path", help="Path to autoload state dir",      metavar="PATH", default="logs/autoload-state")
     parser.add_option('-Z', "--timezone",       dest="tz_name",         help="Timezone for output",                 metavar="TZ",   default="UTC")
     parser.add_option('-f', "--force",          dest="force",           help="Force dangerous operation",           action="store_true")
     parser.add_option(      "--create",         dest="create",          help="Create index database",               action="store_true")
     parser.add_option(      "--skip-missing",   dest="skip_missing",    help="Skip missing logfiles",               action="store_true")
+    parser.add_option(      "--reload",         dest="reload",          help="Force reload lines",                  action="store_true")
     parser.add_option(      "--quiet",          dest="quiet",           help="Supress status messages",             action="store_true")
 
     # parse