scripts/search-index.py
changeset 82 afd3120ec71e
parent 68 8157c41b3236
child 83 a34e9f56ddda
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/search-index.py	Tue Feb 10 04:27:22 2009 +0200
@@ -0,0 +1,169 @@
+"""
+    Tool for accessing the search index
+"""
+
+# XXX: fix path
+import sys; sys.path.insert(0, '.'); sys.path.insert(0, '..')
+
+import datetime, pytz
+
+# configuration and the LogSearchIndex module
+import config, log_search, channels
+
+def _open_index_and_channel (options, channel_name, open_mode) :
+    """
+        Opens+returns a LogSearchIndex and a LogChannel
+    """
+    # open the LogSearchIndex
+    index = log_search.LogSearchIndex(options.index_path, open_mode)
+
+    # open the channel
+    channel = config.LOG_CHANNELS.lookup(channel_name)
+    
+    # return
+    return index, channel
+
+def _load_channel_date (index, options, channel, date) :
+    """
+        Loads the logs for the given date from the channel's LogSource into the given LogSearchIndex
+    """
+
+    if not options.quiet :
+        print "%s %s..." % (channel.id, date.strftime(channel.source.filename_fmt)),
+        
+    try :
+        # load lines for date
+        lines = channel.source.get_date(date)
+    
+    except Exception, e :
+        if not options.skip_missing :
+            raise
+            
+        if not options.quiet :
+            print "Skipped: %s" % (e, )
+    
+    else :
+        # insert -> count
+        count = index.insert(channel, lines)
+
+        if not options.quiet :
+            print "OK: %d lines" % count
+
+def cmd_load (options, channel_name, *dates) :
+    """
+        Loads the logs for a specific channel for the given dates (in terms of the channe logs' timezone) into the index
+    """
+
+    # open index/channel
+    index, channel = _open_index_and_channel(options, channel_name, '*' if options.create_index else 'a')
+    
+    # handle each date
+    for date_name in dates :
+        try :
+            # parse date
+            date = datetime.datetime.strptime(date_name, '%Y-%m-%d').replace(tzinfo=channel.source.tz)
+
+        except Exception, e :
+            print "[ERROR] Invalid date: %s: %s" % (date_name, e)
+
+            if options.skip_missing :
+                continue
+
+            else :
+                raise
+        
+        # load
+        _load_channel_date(index, options, channel, date)
+
+def cmd_load_month (options, channel_name, *months) :
+    """
+        Loads the logs for a specific channel for the given months (in terms of the channel's timezone) into the index
+    """
+
+    # open index/channel
+    index, channel = _open_index_and_channel(options, channel_name, '*' if options.create_index else 'a')
+    
+    # handle each date
+    for month_name in months :
+        try :
+            # parse date
+            month = datetime.datetime.strptime(month_name, '%Y-%m').replace(tzinfo=channel.source.tz)
+
+        except Exception, e :
+            print "[ERROR] Invalid date: %s: %s" % (month_name, e)
+
+            if options.skip_missing :
+                continue
+
+            else :
+                raise
+        
+        # get the set of days
+        days = channel.source.get_month_days(month)
+
+        print "Loading %d days of logs:" % (len(days))
+
+        # load each day
+        for date in days :
+            # convert to datetime
+            dt = datetime.datetime.combine(date, datetime.time(0)).replace(tzinfo=channel.source.tz)
+            
+            # load
+            _load_channel_date(index, options, channel, dt)
+
+def cmd_search (options, channel_name, query) :
+    """
+        Search the index for events on a specific channel with the given query
+    """
+    
+    # sanity-check
+    if options.create_index :
+        raise Exception("--create doesn't make sense for 'search'")
+    
+    # open index/channel
+    index, channel = _open_index_and_channel(options, channel_name, 'r')
+    
+    # search
+    lines = index.search_simple(channel, query)
+    
+    # display as plaintext
+    for line in options.formatter.format_txt(lines) :
+        print line
+
+if __name__ == '__main__' :
+    from optparse import OptionParser
+    
+    # define parser
+    parser = OptionParser(
+        usage           = "%prog [options] <command> [ ... ]",
+        add_help_option = True,
+    )
+
+    # define command-line arguments
+    parser.add_option("-I", "--index", dest="index_path", help="Index database path", metavar="PATH", default="logs/index")
+    parser.add_option("--create", dest="create_index", action="store_true", help="Create index database")
+    parser.add_option("-f", "--formatter", dest="formatter_name", help="LogFormatter to use", default="irssi")
+    parser.add_option("-z", "--timezone", dest="tz_name", help="Timezone for output", metavar="TZ", default="UTC")
+    parser.add_option("--skip-missing", dest="skip_missing", action="store_true", help="Skip missing logfiles")
+    parser.add_option("--quiet", dest="quiet", action="store_true", help="Supress status messages")
+
+    # parse
+    options, args = parser.parse_args()
+
+    # postprocess stuff
+    options.tz = pytz.timezone(options.tz_name)
+    options.formatter = config.LOG_FORMATTERS[options.formatter_name](options.tz, "%H:%M:%S", None, None)
+    
+    # pop command
+    if not args :
+        raise Exception("Missing command")
+
+    command = args.pop(0)
+
+    # inspect
+    func = globals()['cmd_%s' % command]
+    
+    # call
+    func(options, *args)
+
+