scripts/search-index.py
author Tero Marttila <terom@fixme.fi>
Tue, 10 Feb 2009 05:56:57 +0200
changeset 83 a34e9f56ddda
parent 82 afd3120ec71e
permissions -rw-r--r--
improve parser resilience, improve get_month_days, add 'Channel' item to general menu
"""
    Tool for accessing the search index
"""

# XXX: fix path
import sys; sys.path.insert(0, '.'); sys.path.insert(0, '..')

import datetime, pytz

# configuration and the LogSearchIndex module
import config, log_search, channels

def _open_index_and_channel (options, channel_name, open_mode) :
    """
        Opens+returns a LogSearchIndex and a LogChannel
    """
    # open the LogSearchIndex
    index = log_search.LogSearchIndex(options.index_path, open_mode)

    # open the channel
    channel = config.LOG_CHANNELS.lookup(channel_name)
    
    # return
    return index, channel

def _load_channel_date (index, options, channel, date) :
    """
        Loads the logs for the given date from the channel's LogSource into the given LogSearchIndex
    """

    if not options.quiet :
        print "%s %s..." % (channel.id, date.strftime(channel.source.filename_fmt)),
        
    try :
        # load lines for date
        lines = channel.source.get_date(date)
    
    except Exception, e :
        if not options.skip_missing :
            raise
            
        if not options.quiet :
            print "Skipped: %s" % (e, )
    
    else :
        # insert -> count
        count = index.insert(channel, lines)

        if not options.quiet :
            print "OK: %d lines" % count

def cmd_load (options, channel_name, *dates) :
    """
        Loads the logs for a specific channel for the given dates (in terms of the channe logs' timezone) into the index
    """

    # open index/channel
    index, channel = _open_index_and_channel(options, channel_name, '*' if options.create_index else 'a')
    
    # handle each date
    for date_name in dates :
        try :
            # parse date
            date = datetime.datetime.strptime(date_name, '%Y-%m-%d').replace(tzinfo=channel.source.tz)

        except Exception, e :
            print "[ERROR] Invalid date: %s: %s" % (date_name, e)

            if options.skip_missing :
                continue

            else :
                raise
        
        # load
        _load_channel_date(index, options, channel, date)

def cmd_load_month (options, channel_name, *months) :
    """
        Loads the logs for a specific channel for the given months (in terms of the channel's timezone) into the index
    """

    # open index/channel
    index, channel = _open_index_and_channel(options, channel_name, '*' if options.create_index else 'a')
    
    # handle each date
    for month_name in months :
        try :
            # parse date
            month = datetime.datetime.strptime(month_name, '%Y-%m').replace(tzinfo=channel.source.tz)

        except Exception, e :
            print "[ERROR] Invalid date: %s: %s" % (month_name, e)

            if options.skip_missing :
                continue

            else :
                raise
        
        # get the set of days
        days = list(channel.source.get_month_days(month))

        print "Loading %d days of logs:" % (len(days))

        # load each day
        for date in days :
            # convert to datetime
            dt = datetime.datetime.combine(date, datetime.time(0)).replace(tzinfo=channel.source.tz)
            
            # load
            _load_channel_date(index, options, channel, dt)

def cmd_search (options, channel_name, query) :
    """
        Search the index for events on a specific channel with the given query
    """
    
    # sanity-check
    if options.create_index :
        raise Exception("--create doesn't make sense for 'search'")
    
    # open index/channel
    index, channel = _open_index_and_channel(options, channel_name, 'r')
    
    # search
    lines = index.search_simple(channel, query)
    
    # display as plaintext
    for line in options.formatter.format_txt(lines) :
        print line

if __name__ == '__main__' :
    from optparse import OptionParser
    
    # define parser
    parser = OptionParser(
        usage           = "%prog [options] <command> [ ... ]",
        add_help_option = True,
    )

    # define command-line arguments
    parser.add_option("-I", "--index", dest="index_path", help="Index database path", metavar="PATH", default="logs/index")
    parser.add_option("--create", dest="create_index", action="store_true", help="Create index database")
    parser.add_option("-f", "--formatter", dest="formatter_name", help="LogFormatter to use", default="irssi")
    parser.add_option("-z", "--timezone", dest="tz_name", help="Timezone for output", metavar="TZ", default="UTC")
    parser.add_option("--skip-missing", dest="skip_missing", action="store_true", help="Skip missing logfiles")
    parser.add_option("--quiet", dest="quiet", action="store_true", help="Supress status messages")

    # parse
    options, args = parser.parse_args()

    # postprocess stuff
    options.tz = pytz.timezone(options.tz_name)
    options.formatter = config.LOG_FORMATTERS[options.formatter_name](options.tz, "%H:%M:%S", None, None)
    
    # pop command
    if not args :
        raise Exception("Missing command")

    command = args.pop(0)

    # inspect
    func = globals()['cmd_%s' % command]
    
    # call
    func(options, *args)