terom@88: #!/usr/bin/env python2.5 terom@88: terom@65: """ terom@65: Tool for accessing the search index terom@65: """ terom@65: terom@82: # XXX: fix path terom@65: import sys; sys.path.insert(0, '.'); sys.path.insert(0, '..') terom@65: terom@93: import os, os.path terom@65: import datetime, pytz terom@65: terom@82: # configuration and the LogSearchIndex module terom@93: import config, utils, log_search, channels terom@82: terom@88: def _open_index (options, open_mode) : terom@88: """ terom@88: Opens the LogSearchIndex terom@88: """ terom@88: terom@88: return log_search.LogSearchIndex(config.LOG_CHANNELS, options.index_path, open_mode) terom@88: terom@88: terom@82: def _open_index_and_channel (options, channel_name, open_mode) : terom@82: """ terom@82: Opens+returns a LogSearchIndex and a LogChannel terom@82: """ terom@88: terom@82: # open the LogSearchIndex terom@88: index = _open_index(options, open_mode) terom@82: terom@82: # open the channel terom@82: channel = config.LOG_CHANNELS.lookup(channel_name) terom@82: terom@82: # return terom@82: return index, channel terom@82: terom@93: def _insert_lines (index, options, channel, lines) : terom@93: """ terom@93: Insert the given lines into the index. terom@93: terom@93: Assumes the lines will be in time-order, and prints out as status messages the date and count for the inserted lines terom@93: """ terom@93: terom@93: # last date terom@93: date = None terom@93: terom@93: # count terom@93: count = 0 terom@93: terom@93: # iter lines terom@93: for line in lines : terom@93: # output new date header? terom@93: if not options.quiet and (not date or line.timestamp.date() != date) : terom@93: # previous date's line count? terom@93: if date : terom@93: print "OK: %d lines" % count terom@93: terom@93: # reset count terom@93: count = 0 terom@93: terom@93: # timestamp's date terom@93: date = line.timestamp.date() terom@93: terom@93: # status header terom@93: print "%s:" % (date.strftime('%Y-%m-%d'), ), terom@93: terom@93: # insert terom@93: index.insert_line(channel, line) terom@93: terom@93: # count terom@93: count += 1 terom@93: terom@93: # final count line terom@93: if not options.quiet and date : terom@93: print "OK: %d lines" % count terom@93: terom@82: def _load_channel_date (index, options, channel, date) : terom@82: """ terom@82: Loads the logs for the given date from the channel's LogSource into the given LogSearchIndex terom@82: """ terom@82: terom@82: if not options.quiet : terom@93: print "Loading date for channel %s" % channel.id terom@82: terom@82: try : terom@82: # load lines for date terom@82: lines = channel.source.get_date(date) terom@82: terom@82: except Exception, e : terom@82: if not options.skip_missing : terom@82: raise terom@82: terom@82: if not options.quiet : terom@93: print "\tSkipped: %s" % (e, ) terom@82: terom@82: else : terom@93: # insert terom@93: _insert_lines(index, options, channel, lines) terom@89: terom@89: def _parse_date (options, date_str, tz=None, fmt='%Y-%m-%d') : terom@89: """ terom@89: Parse the given datetime, using the given timezone(defaults to options.tz) and format terom@89: """ terom@89: terom@89: # default tz terom@89: if not tz : terom@89: tz = options.tz terom@89: terom@89: try : terom@89: # parse terom@89: return datetime.datetime.strptime(date_str, fmt).replace(tzinfo=tz) terom@89: terom@89: except Exception, e : terom@89: raise CommandError("[ERROR] Invalid date: %s: %s" % (date_str, e)) terom@89: terom@89: def _output_lines (options, lines) : terom@89: """ terom@89: Display the formatted LogLines terom@89: """ terom@89: terom@89: # display as plaintext terom@89: for line, txt_data in options.formatter.format_txt(lines, full_timestamps=True) : terom@89: print txt_data terom@89: terom@88: class CommandError (Exception) : terom@88: """ terom@88: Error with command-line arguments terom@88: """ terom@88: terom@88: pass terom@88: terom@88: def cmd_create (options) : terom@88: """ terom@88: Creates a new index terom@88: """ terom@88: terom@88: # open index terom@88: index = _open_index(options, 'c' if not options.force else '*') terom@88: terom@88: # that's all terom@88: pass terom@88: terom@65: def cmd_load (options, channel_name, *dates) : terom@65: """ terom@82: Loads the logs for a specific channel for the given dates (in terms of the channe logs' timezone) into the index terom@65: """ terom@65: terom@82: # open index/channel terom@88: index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a') terom@65: terom@82: # handle each date terom@89: for date_str in dates : terom@89: # prase date terom@68: try : terom@89: date = _parse_date(options, date_str, channel.source.tz) terom@89: terom@89: # handle errors terom@89: except CommandError, e : terom@82: if options.skip_missing : terom@89: print "[ERROR] %s" % (date_name, e) terom@82: terom@82: else : terom@82: raise terom@68: terom@89: # otherwise, load terom@89: else : terom@89: _load_channel_date(index, options, channel, date) terom@65: terom@82: def cmd_load_month (options, channel_name, *months) : terom@82: """ terom@82: Loads the logs for a specific channel for the given months (in terms of the channel's timezone) into the index terom@82: """ terom@82: terom@82: # open index/channel terom@88: index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a') terom@82: terom@82: # handle each date terom@89: for month_str in months : terom@89: # prase date terom@82: try : terom@89: month = _parse_date(options, month_str, channel.source.tz, '%Y-%m') terom@89: terom@89: # handle errors terom@89: except CommandError, e : terom@89: # skip? terom@82: if options.skip_missing : terom@89: print "[ERROR] %s" % (date_name, e) terom@82: continue terom@82: terom@82: else : terom@82: raise terom@82: terom@82: # get the set of days terom@83: days = list(channel.source.get_month_days(month)) terom@82: terom@82: print "Loading %d days of logs:" % (len(days)) terom@82: terom@82: # load each day terom@82: for date in days : terom@82: # convert to datetime terom@82: dt = datetime.datetime.combine(date, datetime.time(0)).replace(tzinfo=channel.source.tz) terom@82: terom@82: # load terom@82: _load_channel_date(index, options, channel, dt) terom@65: terom@65: def cmd_search (options, channel_name, query) : terom@65: """ terom@65: Search the index for events on a specific channel with the given query terom@65: """ terom@65: terom@82: # sanity-check terom@89: if options.create : terom@82: raise Exception("--create doesn't make sense for 'search'") terom@82: terom@82: # open index/channel terom@82: index, channel = _open_index_and_channel(options, channel_name, 'r') terom@65: terom@65: # search terom@65: lines = index.search_simple(channel, query) terom@65: terom@89: # display terom@89: _output_lines(options, lines) terom@89: terom@89: def cmd_list (options, channel_name, *dates) : terom@89: """ terom@89: List the indexed events for a specific date terom@89: """ terom@89: terom@89: # sanity-check terom@89: if options.create : terom@89: raise Exception("--create doesn't make sense for 'search'") terom@89: terom@89: # open index/channel terom@89: index, channel = _open_index_and_channel(options, channel_name, 'r') terom@89: terom@89: # ...for each date terom@89: for date_str in dates : terom@89: # parse date terom@89: date = _parse_date(options, date_str) terom@89: terom@89: # list terom@89: lines = index.list(channel, date) terom@89: terom@89: # display terom@89: _output_lines(options, lines) terom@65: terom@93: def cmd_autoload (options, *channel_names) : terom@93: """ terom@93: Automatically loads all channel logs that have not been indexed yet (by logfile mtime) terom@93: """ terom@93: terom@93: # open index terom@93: index = _open_index(options, 'c' if options.create else 'a') terom@93: terom@93: # default to all channels terom@93: if not channel_names : terom@93: channels = config.LOG_CHANNELS terom@93: terom@93: else : terom@93: channels = [config.LOG_CHANNELS.lookup(channel_name) for channel_name in channel_names] terom@93: terom@93: # iterate channels terom@93: for channel in channels : terom@93: if not options.quiet : terom@93: print "Channel %s:" % channel.id, terom@93: terom@93: # path to our state file terom@93: statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id) terom@94: terom@93: # override? terom@93: if options.reload : terom@93: # load all terom@93: mtime = None terom@93: terom@93: if not options.quiet : terom@93: print "reloading all:", terom@93: terom@93: # stat for mtime terom@95: else : terom@95: # stat for mtime, None if unknown terom@95: mtime = utils.mtime(statefile_path, ignore_missing=True) terom@95: terom@95: if mtime and not options.quiet : terom@93: print "last load=%s:" % mtime, terom@93: terom@95: elif not options.quiet : terom@93: print "no previous load state:", terom@94: terom@94: # only after some specific date? terom@94: if options.after : terom@94: after = options.after terom@94: terom@94: print "after=%s:" % after, terom@94: terom@94: else : terom@94: after = None terom@93: terom@93: # get lines terom@94: lines = channel.source.get_modified(mtime, after) terom@93: terom@93: # insert terom@93: if not options.quiet : terom@93: print "inserting..." terom@93: terom@93: _insert_lines(index, options, channel, lines) terom@93: terom@93: # write autoload state terom@93: open(statefile_path, 'w').close() terom@93: terom@88: def cmd_help (options, *args) : terom@88: """ terom@88: Help about commands terom@88: """ terom@88: terom@88: import inspect terom@88: terom@88: # general help stuff terom@88: options._parser.print_help() terom@88: terom@88: # specific command? terom@88: if args : terom@88: # the command name terom@88: command, = args terom@88: terom@88: # XXX: display info about specific command terom@88: xxx terom@88: terom@88: # general terom@88: else : terom@88: print terom@88: print "Available commands:" terom@88: terom@88: # build list of all cmd_* objects terom@88: cmd_objects = [(name, obj) for name, obj in globals().iteritems() if name.startswith('cmd_') and inspect.isfunction(obj)] terom@88: terom@88: # sort alphabetically terom@88: cmd_objects.sort() terom@88: terom@88: # iterate through all cmd_* objects terom@88: for cmd_func_name, cmd_func in cmd_objects : terom@88: # remove cmd_ prefix terom@88: cmd_name = cmd_func_name[4:] terom@88: terom@88: # inspect terom@88: cmd_args, cmd_varargs, cmd_varkw, cmd_default = inspect.getargspec(cmd_func) terom@88: cmd_doc = inspect.getdoc(cmd_func) terom@88: terom@88: # remove the "options" arg terom@88: cmd_args = cmd_args[1:] terom@88: terom@88: # display terom@88: print "\t%10s %-30s : %s" % (cmd_name, inspect.formatargspec(cmd_args, cmd_varargs, None, cmd_default), cmd_doc) terom@88: terom@88: def main (argv) : terom@88: """ terom@88: Command-line main, with given argv terom@88: """ terom@88: terom@65: from optparse import OptionParser terom@65: terom@65: # define parser terom@65: parser = OptionParser( terom@65: usage = "%prog [options] [ ... ]", terom@88: add_help_option = False, terom@65: ) terom@65: terom@65: # define command-line arguments terom@94: parser.add_option('-h', "--help", dest="help", help="Show this help message and exit", action="store_true" ) terom@88: parser.add_option('-F', "--formatter", dest="formatter_name", help="LogFormatter to use", metavar="FMT", type="choice", default="irssi", terom@88: choices=[fmt_name for fmt_name in config.LOG_FORMATTERS.iterkeys()]) terom@88: terom@94: parser.add_option('-I', "--index", dest="index_path", help="Index database path", metavar="PATH", default="logs/index" ) terom@94: parser.add_option( "--autoload-state", dest="autoload_state_path", help="Path to autoload state dir", metavar="PATH", default="logs/autoload-state" ) terom@94: parser.add_option( "--after", dest="after", help="Only autoload logfiles after the given date", metavar="DATE", default=None ) terom@94: parser.add_option('-Z', "--timezone", dest="tz_name", help="Timezone for output", metavar="TZ", default="UTC" ) terom@94: parser.add_option('-f', "--force", dest="force", help="Force dangerous operation", action="store_true" ) terom@94: parser.add_option( "--create", dest="create", help="Create index database", action="store_true" ) terom@94: parser.add_option( "--skip-missing", dest="skip_missing", help="Skip missing logfiles", action="store_true" ) terom@94: parser.add_option( "--reload", dest="reload", help="Force reload lines", action="store_true" ) terom@94: parser.add_option( "--quiet", dest="quiet", help="Supress status messages", action="store_true" ) terom@65: terom@65: # parse terom@88: options, args = parser.parse_args(argv[1:]) terom@65: terom@65: # postprocess stuff terom@88: options._parser = parser terom@65: options.tz = pytz.timezone(options.tz_name) terom@82: options.formatter = config.LOG_FORMATTERS[options.formatter_name](options.tz, "%H:%M:%S", None, None) terom@88: terom@94: if options.after : terom@94: options.after = _parse_date(options, options.after) terom@94: terom@88: # special-case --help terom@88: if options.help : terom@88: return cmd_help(options, *args) terom@88: terom@88: # must have at least the command argument terom@88: if not args : terom@88: raise CommandError("Missing command") terom@65: terom@65: # pop command terom@65: command = args.pop(0) terom@88: terom@88: # get func terom@88: func = globals().get('cmd_%s' % command) terom@88: terom@88: # unknown command? terom@88: if not func : terom@88: raise CommandError("Unknown command: %s" % command) terom@65: terom@65: # call terom@65: func(options, *args) terom@65: terom@88: if __name__ == '__main__' : terom@88: try : terom@88: main(sys.argv) terom@88: sys.exit(0) terom@65: terom@88: except CommandError, e : terom@88: print e terom@88: sys.exit(1) terom@88: