scripts/search-index
changeset 88 0b8e2ba5f76f
parent 83 a34e9f56ddda
child 89 2dc6de43f317
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/search-index	Tue Feb 10 23:59:56 2009 +0200
@@ -0,0 +1,268 @@
+#!/usr/bin/env python2.5
+
+"""
+    Tool for accessing the search index
+"""
+
+# XXX: fix path
+import sys; sys.path.insert(0, '.'); sys.path.insert(0, '..')
+
+import datetime, pytz
+
+# configuration and the LogSearchIndex module
+import config, log_search, channels
+
+def _open_index (options, open_mode) :
+    """
+        Opens the LogSearchIndex
+    """
+
+    return log_search.LogSearchIndex(config.LOG_CHANNELS, options.index_path, open_mode)
+
+
+def _open_index_and_channel (options, channel_name, open_mode) :
+    """
+        Opens+returns a LogSearchIndex and a LogChannel
+    """
+    
+    # open the LogSearchIndex
+    index = _open_index(options, open_mode)
+
+    # open the channel
+    channel = config.LOG_CHANNELS.lookup(channel_name)
+    
+    # return
+    return index, channel
+
+def _load_channel_date (index, options, channel, date) :
+    """
+        Loads the logs for the given date from the channel's LogSource into the given LogSearchIndex
+    """
+
+    if not options.quiet :
+        print "%s %s..." % (channel.id, date.strftime(channel.source.filename_fmt)),
+        
+    try :
+        # load lines for date
+        lines = channel.source.get_date(date)
+    
+    except Exception, e :
+        if not options.skip_missing :
+            raise
+            
+        if not options.quiet :
+            print "Skipped: %s" % (e, )
+    
+    else :
+        # insert -> count
+        count = index.insert(channel, lines)
+
+        if not options.quiet :
+            print "OK: %d lines" % count
+
+class CommandError (Exception) :
+    """
+        Error with command-line arguments
+    """
+
+    pass
+
+def cmd_create (options) :
+    """
+        Creates a new index
+    """
+
+    # open index
+    index = _open_index(options, 'c' if not options.force else '*')
+
+    # that's all
+    pass
+
+def cmd_load (options, channel_name, *dates) :
+    """
+        Loads the logs for a specific channel for the given dates (in terms of the channe logs' timezone) into the index
+    """
+
+    # open index/channel
+    index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a')
+    
+    # handle each date
+    for date_name in dates :
+        try :
+            # parse date
+            date = datetime.datetime.strptime(date_name, '%Y-%m-%d').replace(tzinfo=channel.source.tz)
+
+        except Exception, e :
+            print "[ERROR] Invalid date: %s: %s" % (date_name, e)
+
+            if options.skip_missing :
+                continue
+
+            else :
+                raise
+        
+        # load
+        _load_channel_date(index, options, channel, date)
+
+def cmd_load_month (options, channel_name, *months) :
+    """
+        Loads the logs for a specific channel for the given months (in terms of the channel's timezone) into the index
+    """
+
+    # open index/channel
+    index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a')
+    
+    # handle each date
+    for month_name in months :
+        try :
+            # parse date
+            month = datetime.datetime.strptime(month_name, '%Y-%m').replace(tzinfo=channel.source.tz)
+
+        except Exception, e :
+            print "[ERROR] Invalid date: %s: %s" % (month_name, e)
+
+            if options.skip_missing :
+                continue
+
+            else :
+                raise
+        
+        # get the set of days
+        days = list(channel.source.get_month_days(month))
+
+        print "Loading %d days of logs:" % (len(days))
+
+        # load each day
+        for date in days :
+            # convert to datetime
+            dt = datetime.datetime.combine(date, datetime.time(0)).replace(tzinfo=channel.source.tz)
+            
+            # load
+            _load_channel_date(index, options, channel, dt)
+
+def cmd_search (options, channel_name, query) :
+    """
+        Search the index for events on a specific channel with the given query
+    """
+    
+    # sanity-check
+    if options.create_index :
+        raise Exception("--create doesn't make sense for 'search'")
+    
+    # open index/channel
+    index, channel = _open_index_and_channel(options, channel_name, 'r')
+    
+    # search
+    lines = index.search_simple(channel, query)
+    
+    # display as plaintext
+    for line in options.formatter.format_txt(lines) :
+        print line
+
+def cmd_help (options, *args) :
+    """
+        Help about commands
+    """
+
+    import inspect
+    
+    # general help stuff
+    options._parser.print_help()
+
+    # specific command?
+    if args :
+        # the command name
+        command, = args
+        
+        # XXX: display info about specific command
+        xxx
+    
+    # general
+    else :
+        print
+        print "Available commands:"
+
+        # build list of all cmd_* objects
+        cmd_objects = [(name, obj) for name, obj in globals().iteritems() if name.startswith('cmd_') and inspect.isfunction(obj)]
+
+        # sort alphabetically
+        cmd_objects.sort()
+        
+        # iterate through all cmd_* objects
+        for cmd_func_name, cmd_func in cmd_objects :
+            # remove cmd_ prefix
+            cmd_name = cmd_func_name[4:]
+
+            # inspect
+            cmd_args, cmd_varargs, cmd_varkw, cmd_default = inspect.getargspec(cmd_func)
+            cmd_doc = inspect.getdoc(cmd_func)
+
+            # remove the "options" arg
+            cmd_args = cmd_args[1:]
+
+            # display
+            print "\t%10s %-30s : %s" % (cmd_name, inspect.formatargspec(cmd_args, cmd_varargs, None, cmd_default), cmd_doc)
+
+def main (argv) :
+    """
+        Command-line main, with given argv
+    """
+
+    from optparse import OptionParser
+    
+    # define parser
+    parser = OptionParser(
+        usage           = "%prog [options] <command> [ ... ]",
+        add_help_option = False,
+    )
+
+    # define command-line arguments
+    parser.add_option('-h', "--help",           dest="help",            help="Show this help message and exit",     action="store_true")
+    parser.add_option('-F', "--formatter",      dest="formatter_name",  help="LogFormatter to use",                 metavar="FMT",  type="choice", default="irssi",
+        choices=[fmt_name for fmt_name in config.LOG_FORMATTERS.iterkeys()])
+
+    parser.add_option('-I', "--index",          dest="index_path",      help="Index database path",                 metavar="PATH", default="logs/index")
+    parser.add_option('-Z', "--timezone",       dest="tz_name",         help="Timezone for output",                 metavar="TZ",   default="UTC")
+    parser.add_option('-f', "--force",          dest="force",           help="Force dangerous operation",           action="store_true")
+    parser.add_option(      "--create",         dest="create",          help="Create index database",               action="store_true")
+    parser.add_option(      "--skip-missing",   dest="skip_missing",    help="Skip missing logfiles",               action="store_true")
+    parser.add_option(      "--quiet",          dest="quiet",           help="Supress status messages",             action="store_true")
+
+    # parse
+    options, args = parser.parse_args(argv[1:])
+
+    # postprocess stuff
+    options._parser = parser
+    options.tz = pytz.timezone(options.tz_name)
+    options.formatter = config.LOG_FORMATTERS[options.formatter_name](options.tz, "%H:%M:%S", None, None)
+
+    # special-case --help
+    if options.help :
+        return cmd_help(options, *args)
+    
+    # must have at least the command argument
+    if not args :
+        raise CommandError("Missing command")
+    
+    # pop command
+    command = args.pop(0)
+    
+    # get func
+    func = globals().get('cmd_%s' % command)
+    
+    # unknown command?
+    if not func :
+        raise CommandError("Unknown command: %s" % command)
+    
+    # call
+    func(options, *args)
+
+if __name__ == '__main__' :
+    try :
+        main(sys.argv)
+        sys.exit(0)
+
+    except CommandError, e :
+        print e
+        sys.exit(1)
+