scripts/search-index
author Tero Marttila <terom@fixme.fi>
Wed, 11 Feb 2009 02:21:43 +0200
changeset 95 ebdbda3dd5d0
parent 94 6673de9bc911
child 98 8c6e36849f9a
permissions -rwxr-xr-x
implement utils.mtime
#!/usr/bin/env python2.5

"""
    Tool for accessing the search index
"""

# XXX: fix path
import sys; sys.path.insert(0, '.'); sys.path.insert(0, '..')

import os, os.path
import datetime, pytz

# configuration and the LogSearchIndex module
import config, utils, log_search, channels

def _open_index (options, open_mode) :
    """
        Opens the LogSearchIndex
    """

    return log_search.LogSearchIndex(config.LOG_CHANNELS, options.index_path, open_mode)


def _open_index_and_channel (options, channel_name, open_mode) :
    """
        Opens+returns a LogSearchIndex and a LogChannel
    """
    
    # open the LogSearchIndex
    index = _open_index(options, open_mode)

    # open the channel
    channel = config.LOG_CHANNELS.lookup(channel_name)
    
    # return
    return index, channel

def _insert_lines (index, options, channel, lines) :
    """
        Insert the given lines into the index.

        Assumes the lines will be in time-order, and prints out as status messages the date and count for the inserted lines
    """

    # last date
    date = None

    # count
    count = 0

    # iter lines
    for line in lines :
        # output new date header?
        if not options.quiet and (not date or line.timestamp.date() != date) :
            # previous date's line count?
            if date :
                print "OK: %d lines" % count
            
            # reset count
            count = 0

            # timestamp's date
            date = line.timestamp.date()
            
            # status header
            print "%s:" % (date.strftime('%Y-%m-%d'), ),

        # insert
        index.insert_line(channel, line)

        # count
        count += 1
    
    # final count line
    if not options.quiet and date :
        print "OK: %d lines" % count

def _load_channel_date (index, options, channel, date) :
    """
        Loads the logs for the given date from the channel's LogSource into the given LogSearchIndex
    """

    if not options.quiet :
        print "Loading date for channel %s" % channel.id
        
    try :
        # load lines for date
        lines = channel.source.get_date(date)
    
    except Exception, e :
        if not options.skip_missing :
            raise
            
        if not options.quiet :
            print "\tSkipped: %s" % (e, )
    
    else :
        # insert
        _insert_lines(index, options, channel, lines)

def _parse_date (options, date_str, tz=None, fmt='%Y-%m-%d') :
    """
        Parse the given datetime, using the given timezone(defaults to options.tz) and format
    """

    # default tz
    if not tz :
        tz = options.tz

    try :
        # parse
        return datetime.datetime.strptime(date_str, fmt).replace(tzinfo=tz)

    except Exception, e :
        raise CommandError("[ERROR] Invalid date: %s: %s" % (date_str, e))

def _output_lines (options, lines) :
    """
        Display the formatted LogLines
    """

    # display as plaintext
    for line, txt_data in options.formatter.format_txt(lines, full_timestamps=True) :
        print txt_data

class CommandError (Exception) :
    """
        Error with command-line arguments
    """

    pass

def cmd_create (options) :
    """
        Creates a new index
    """

    # open index
    index = _open_index(options, 'c' if not options.force else '*')

    # that's all
    pass

def cmd_load (options, channel_name, *dates) :
    """
        Loads the logs for a specific channel for the given dates (in terms of the channe logs' timezone) into the index
    """

    # open index/channel
    index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a')
    
    # handle each date
    for date_str in dates :
        # prase date
        try :
            date = _parse_date(options, date_str, channel.source.tz)
        
        # handle errors
        except CommandError, e :
            if options.skip_missing :
                print "[ERROR] %s" % (date_name, e)

            else :
                raise
        
        # otherwise, load
        else :        
            _load_channel_date(index, options, channel, date)

def cmd_load_month (options, channel_name, *months) :
    """
        Loads the logs for a specific channel for the given months (in terms of the channel's timezone) into the index
    """

    # open index/channel
    index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a')
    
    # handle each date
    for month_str in months :
        # prase date
        try :
            month = _parse_date(options, month_str, channel.source.tz, '%Y-%m')
        
        # handle errors
        except CommandError, e :
            # skip?
            if options.skip_missing :
                print "[ERROR] %s" % (date_name, e)
                continue

            else :
                raise
        
        # get the set of days
        days = list(channel.source.get_month_days(month))

        print "Loading %d days of logs:" % (len(days))

        # load each day
        for date in days :
            # convert to datetime
            dt = datetime.datetime.combine(date, datetime.time(0)).replace(tzinfo=channel.source.tz)
            
            # load
            _load_channel_date(index, options, channel, dt)

def cmd_search (options, channel_name, query) :
    """
        Search the index for events on a specific channel with the given query
    """
    
    # sanity-check
    if options.create :
        raise Exception("--create doesn't make sense for 'search'")
    
    # open index/channel
    index, channel = _open_index_and_channel(options, channel_name, 'r')
    
    # search
    lines = index.search_simple(channel, query)
    
    # display
    _output_lines(options, lines)

def cmd_list (options, channel_name, *dates) :
    """
        List the indexed events for a specific date
    """

    # sanity-check
    if options.create :
        raise Exception("--create doesn't make sense for 'search'")
    
    # open index/channel
    index, channel = _open_index_and_channel(options, channel_name, 'r')

    # ...for each date
    for date_str in dates :
        # parse date
        date = _parse_date(options, date_str)

        # list
        lines = index.list(channel, date)
        
        # display
        _output_lines(options, lines)

def cmd_autoload (options, *channel_names) :
    """
        Automatically loads all channel logs that have not been indexed yet (by logfile mtime)
    """
    
    # open index
    index = _open_index(options, 'c' if options.create else 'a')

    # default to all channels
    if not channel_names :
        channels = config.LOG_CHANNELS
    
    else :
        channels = [config.LOG_CHANNELS.lookup(channel_name) for channel_name in channel_names]

    # iterate channels
    for channel in channels :
        if not options.quiet :
            print "Channel %s:" % channel.id,

        # path to our state file
        statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id)
       
        # override?
        if options.reload :
            # load all
            mtime = None

            if not options.quiet :
                print "reloading all:",

        # stat for mtime
        else :
            # stat for mtime, None if unknown
            mtime = utils.mtime(statefile_path, ignore_missing=True)

            if mtime and not options.quiet :
                print "last load=%s:" % mtime,

            elif not options.quiet :
                print "no previous load state:",
 
        # only after some specific date?
        if options.after :
            after = options.after
            
            print "after=%s:" % after,

        else :
            after = None
        
        # get lines
        lines = channel.source.get_modified(mtime, after)
        
        # insert
        if not options.quiet :
            print "inserting..."
        
        _insert_lines(index, options, channel, lines)

        # write autoload state
        open(statefile_path, 'w').close()

def cmd_help (options, *args) :
    """
        Help about commands
    """

    import inspect
    
    # general help stuff
    options._parser.print_help()

    # specific command?
    if args :
        # the command name
        command, = args
        
        # XXX: display info about specific command
        xxx
    
    # general
    else :
        print
        print "Available commands:"

        # build list of all cmd_* objects
        cmd_objects = [(name, obj) for name, obj in globals().iteritems() if name.startswith('cmd_') and inspect.isfunction(obj)]

        # sort alphabetically
        cmd_objects.sort()
        
        # iterate through all cmd_* objects
        for cmd_func_name, cmd_func in cmd_objects :
            # remove cmd_ prefix
            cmd_name = cmd_func_name[4:]

            # inspect
            cmd_args, cmd_varargs, cmd_varkw, cmd_default = inspect.getargspec(cmd_func)
            cmd_doc = inspect.getdoc(cmd_func)

            # remove the "options" arg
            cmd_args = cmd_args[1:]

            # display
            print "\t%10s %-30s : %s" % (cmd_name, inspect.formatargspec(cmd_args, cmd_varargs, None, cmd_default), cmd_doc)

def main (argv) :
    """
        Command-line main, with given argv
    """

    from optparse import OptionParser
    
    # define parser
    parser = OptionParser(
        usage           = "%prog [options] <command> [ ... ]",
        add_help_option = False,
    )

    # define command-line arguments
    parser.add_option('-h', "--help",           dest="help",            help="Show this help message and exit",     action="store_true" )
    parser.add_option('-F', "--formatter",      dest="formatter_name",  help="LogFormatter to use",                 metavar="FMT",  type="choice", default="irssi",
        choices=[fmt_name for fmt_name in config.LOG_FORMATTERS.iterkeys()])

    parser.add_option('-I', "--index",          dest="index_path",      help="Index database path",                 metavar="PATH", default="logs/index"            )
    parser.add_option(      "--autoload-state", dest="autoload_state_path", help="Path to autoload state dir",      metavar="PATH", default="logs/autoload-state"   )
    parser.add_option(      "--after",          dest="after",           help="Only autoload logfiles after the given date", metavar="DATE", default=None            )
    parser.add_option('-Z', "--timezone",       dest="tz_name",         help="Timezone for output",                 metavar="TZ",   default="UTC"                   )
    parser.add_option('-f', "--force",          dest="force",           help="Force dangerous operation",           action="store_true" )
    parser.add_option(      "--create",         dest="create",          help="Create index database",               action="store_true" )
    parser.add_option(      "--skip-missing",   dest="skip_missing",    help="Skip missing logfiles",               action="store_true" )
    parser.add_option(      "--reload",         dest="reload",          help="Force reload lines",                  action="store_true" )
    parser.add_option(      "--quiet",          dest="quiet",           help="Supress status messages",             action="store_true" )

    # parse
    options, args = parser.parse_args(argv[1:])

    # postprocess stuff
    options._parser = parser
    options.tz = pytz.timezone(options.tz_name)
    options.formatter = config.LOG_FORMATTERS[options.formatter_name](options.tz, "%H:%M:%S", None, None)

    if options.after :
        options.after = _parse_date(options, options.after)

    # special-case --help
    if options.help :
        return cmd_help(options, *args)
    
    # must have at least the command argument
    if not args :
        raise CommandError("Missing command")
    
    # pop command
    command = args.pop(0)
    
    # get func
    func = globals().get('cmd_%s' % command)
    
    # unknown command?
    if not func :
        raise CommandError("Unknown command: %s" % command)
    
    # call
    func(options, *args)

if __name__ == '__main__' :
    try :
        main(sys.argv)
        sys.exit(0)

    except CommandError, e :
        print e
        sys.exit(1)