--- a/scripts/search-index Sun Sep 13 00:49:55 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,640 +0,0 @@
-#!/usr/bin/env python2.5
-
-"""
- Tool for accessing the search index
-"""
-
-# XXX: fix path
-import sys; sys.path.insert(0, '.'); sys.path.insert(0, '..')
-
-import os, os.path, fcntl
-import datetime, pytz
-import optparse
-
-# configuration and the LogSearchIndex module
-import config, utils, log_search, channels
-
-def _open_index (options, open_mode) :
- """
- Opens the LogSearchIndex
- """
-
- return log_search.LogSearchIndex(config.LOG_CHANNELS, options.index_path, open_mode)
-
-
-def _open_index_and_channel (options, channel_name, open_mode) :
- """
- Opens+returns a LogSearchIndex and a LogChannel
- """
-
- # open the LogSearchIndex
- index = _open_index(options, open_mode)
-
- # open the channel
- channel = config.LOG_CHANNELS.lookup(channel_name)
-
- # return
- return index, channel
-
-def _iter_insert_stats (index, channel, lines) :
- """
- Insert the given lines into the index.
-
- Assumes the lines will be in time-order, and yields a series of (date, count) tuples for every date that lines
- are inserted for
- """
-
- # last date
- date = None
-
- # count
- count = 0
-
- # iter lines
- for line in lines :
- # next day?
- if not date or line.timestamp.date() != date :
- if date :
- # yield stats
- yield date, count
-
- # reset count
- count = 0
-
- # timestamp's date
- date = line.timestamp.date()
-
- # insert
- index.insert_line(channel, line)
-
- # count
- count += 1
-
- # final count?
- if date and count :
- yield date, count
-
-def _insert_lines (index, options, channel, lines) :
- """
- Insert the given lines into the index.
-
- Assumes the lines will be in time-order, and prints out as status messages the date and count for the inserted lines
- """
-
- # iterate insert stats
- for date, count in _iter_insert_stats(index, channel, lines) :
- # output date header?
- if not options.quiet :
- print "%s: %s" % (date.strftime('%Y-%m-%d'), count),
-
-def _load_channel_date (index, options, channel, date) :
- """
- Loads the logs for the given date from the channel's LogSource into the given LogSearchIndex
- """
-
- if not options.quiet :
- print "Loading date for channel %s" % channel.id
-
- try :
- # load lines for date
- lines = channel.source.get_date(date)
-
- except Exception, e :
- if not options.skip_missing :
- raise
-
- if not options.quiet :
- print "\tSkipped: %s" % (e, )
-
- else :
- # insert
- _insert_lines(index, options, channel, lines)
-
-def _parse_date (options, date_str, tz=None, fmt='%Y-%m-%d') :
- """
- Parse the given datetime, using the given timezone(defaults to options.tz) and format
- """
-
- # default tz
- if not tz :
- tz = options.timezone
-
- try :
- # parse
- return datetime.datetime.strptime(date_str, fmt).replace(tzinfo=tz)
-
- except Exception, e :
- raise CommandError("[ERROR] Invalid date: %s: %s" % (date_str, e))
-
-def _output_lines (options, lines) :
- """
- Display the formatted LogLines
- """
-
- # display as plaintext
- for line, txt_data in options.formatter.format_txt(lines, full_timestamps=True) :
- print txt_data
-
-class CommandError (Exception) :
- """
- Error with command-line arguments
- """
-
- pass
-
-def cmd_create (options) :
- """
- Creates a new index
- """
-
- # open index
- index = _open_index(options, 'ctrunc' if options.force else 'c')
-
- # that's all
- pass
-
-def cmd_load (options, channel_name, *dates) :
- """
- Loads the logs for a specific channel for the given dates (in terms of the channe logs' timezone) into the index
- """
-
- # open index/channel
- index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a')
-
- # handle each date
- for date_str in dates :
- # prase date
- try :
- date = _parse_date(options, date_str, channel.source.tz)
-
- # handle errors
- except CommandError, e :
- if options.skip_missing :
- print "[ERROR] %s" % (date_name, e)
-
- else :
- raise
-
- # otherwise, load
- else :
- _load_channel_date(index, options, channel, date)
-
-def cmd_load_month (options, channel_name, *months) :
- """
- Loads the logs for a specific channel for the given months (in terms of the channel's timezone) into the index
- """
-
- # open index/channel
- index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a')
-
- # handle each date
- for month_str in months :
- # prase date
- try :
- month = _parse_date(options, month_str, channel.source.tz, '%Y-%m')
-
- # handle errors
- except CommandError, e :
- # skip?
- if options.skip_missing :
- if not options.quiet :
- print "[ERROR] %s" % (date_name, e)
- continue
-
- else :
- raise
-
- # get the set of days
- days = list(channel.source.get_month_days(month))
-
- if not options.quiet :
- print "Loading %d days of logs:" % (len(days))
-
- # load each day
- for date in days :
- # convert to datetime
- dt = datetime.datetime.combine(date, datetime.time(0)).replace(tzinfo=channel.source.tz)
-
- # load
- _load_channel_date(index, options, channel, dt)
-
-def cmd_search (options, channel_name, query) :
- """
- Search the index for events on a specific channel with the given query
- """
-
- # sanity-check
- if options.create :
- raise Exception("--create doesn't make sense for 'search'")
-
- # open index/channel
- index, channel = _open_index_and_channel(options, channel_name, 'r')
-
- # search
- lines = index.search_simple(channel, query)
-
- # display
- _output_lines(options, lines)
-
-def cmd_list (options, channel_name, *dates) :
- """
- List the indexed events for a specific date
- """
-
- # sanity-check
- if options.create :
- raise Exception("--create doesn't make sense for 'search'")
-
- # open index/channel
- index, channel = _open_index_and_channel(options, channel_name, 'r')
-
- # ...for each date
- for date_str in dates :
- # parse date
- date = _parse_date(options, date_str)
-
- # list
- lines = index.list(channel, date)
-
- # display
- _output_lines(options, lines)
-
-def _autoload_reset (options, channels) :
- """
- Reset old autoload state
- """
-
- # warn
- if not options.quiet :
- print "[WARN] Resetting autoload state for: %s" % ', '.join(channel.id for channel in channels)
-
- # iter
- for channel in channels :
- # statefile path
- statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id)
-
- # is it present?
- if not os.path.exists(statefile_path) :
- if not options.quiet :
- print "[WARN] No statefile found at %s" % statefile_path
-
- else :
- if not options.quiet :
- print "\t%s: " % channel.id,
-
- # remove the statefile
- os.remove(statefile_path)
-
- if not options.quiet :
- print "OK"
-
-def cmd_autoload (options, *channel_names) :
- """
- Automatically loads all channel logs that have not been indexed yet (by logfile mtime)
- """
-
- # open index, nonblocking
- index = _open_index(options, 'c?' if options.create else 'a?')
-
- # default to all channels
- if not channel_names :
- channels = config.LOG_CHANNELS
-
- else :
- channels = [config.LOG_CHANNELS.lookup(channel_name) for channel_name in channel_names]
-
- # reset autoload state?
- if options.reset :
- _autoload_reset(options, channels)
- if not options.quiet :
- print
-
- # iterate channels
- for channel in channels :
- if not options.quiet :
- print "Channel %s:" % channel.id
-
- # no 'from' by default
- after = None
-
- # path to our state file
- statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id)
- statefile_tmppath = statefile_path + '.tmp'
-
- # does it exist?
- have_tmpfile = os.path.exists(statefile_tmppath)
-
- # do we have a tempfile from a previous crash?
- if have_tmpfile and not options.ignore_resume :
- # first, open it...
- statefile_tmp = open(statefile_tmppath, 'r+')
-
- # ... then lock it
- fcntl.lockf(statefile_tmp, fcntl.LOCK_EX | fcntl.LOCK_NB)
-
- # read after timestamp
- after_str = statefile_tmp.read().rstrip()
-
- if after_str :
- # parse timestamp
- after = utils.from_utc_timestamp(int(after_str))
-
- if not options.quiet :
- print "\tContinuing earlier progress from %s" % after
-
- else :
- # ignore
- if not options.quiet :
- print "\t[WARN] Ignoring empty temporary statefile"
-
- else :
- # warn about old tmpfile that was ignored
- if have_tmpfile and not options.quiet :
- print "\t[WARN] Ignoring old tmpfile state"
-
- # open new tempfile
- statefile_tmp = open(statefile_tmppath, 'w')
-
- # lock
- fcntl.lockf(statefile_tmp, fcntl.LOCK_EX | fcntl.LOCK_NB)
-
- # override?
- if options.reload :
- # load all
- mtime = None
-
- if not options.quiet :
- print "\tForcing reload!"
-
- # stat for mtime
- else :
- # stat for mtime, None if unknown
- mtime = utils.mtime(statefile_path, ignore_missing=True)
-
- if mtime and not options.quiet :
- print "\tLast load time was %s" % mtime
-
- elif not options.quiet :
- print "\t[WARN] No previous load state! Loading full logs"
-
- # only after some specific date?
- if options.after :
- # use unless read from tempfile
- if not after :
- after = options.after
-
- if not options.quiet :
- print "\tOnly including dates from %s onwards" % after
-
- else :
- if not options.quiet :
- print "\t[WARN] Ignoring --from because we found a tempfile"
-
- # only up to some specific date?
- if options.until :
- until = options.until
-
- if not options.quiet :
- print "\tOnly including dates up to (and including) %s" % until
- else :
- # default to now
- until = None
-
- # get lines
- lines = channel.source.get_modified(mtime, after, until)
-
- # insert
- if not options.quiet :
- print "\tLoading and inserting..."
- print
-
- # iterate insert() per day to display info and update progress
- for date, count in _iter_insert_stats(index, channel, lines) :
- # output date header?
- if not options.quiet :
- print "\t%10s: %d" % (date.strftime('%Y-%m-%d'), count)
-
- # write temp state
- statefile_tmp.seek(0)
- statefile_tmp.write(str(utils.to_utc_timestamp(datetime.datetime.combine(date, datetime.time(0)))))
- statefile_tmp.flush()
-
- # write autoload state
- open(statefile_path, 'w').close()
-
- # close+delete tempfile
- statefile_tmp.close()
- os.remove(statefile_tmppath)
-
- if not options.quiet :
- print
-
- # done
- return
-
-def cmd_help (options, *args) :
- """
- Help about commands
- """
-
- import inspect
-
- # general help stuff
- options._parser.print_help()
-
- # specific command?
- if args :
- # the command name
- command, = args
-
- # XXX: display info about specific command
- xxx
-
- # general
- else :
- print
- print "Available commands:"
-
- # build list of all cmd_* objects
- cmd_objects = [(name, obj) for name, obj in globals().iteritems() if name.startswith('cmd_') and inspect.isfunction(obj)]
-
- # sort alphabetically
- cmd_objects.sort()
-
- # iterate through all cmd_* objects
- for cmd_func_name, cmd_func in cmd_objects :
- # remove cmd_ prefix
- cmd_name = cmd_func_name[4:]
-
- # inspect
- cmd_args, cmd_varargs, cmd_varkw, cmd_default = inspect.getargspec(cmd_func)
- cmd_doc = inspect.getdoc(cmd_func)
-
- # remove the "options" arg
- cmd_args = cmd_args[1:]
-
- # display
- print "\t%10s %-30s : %s" % (cmd_name, inspect.formatargspec(cmd_args, cmd_varargs, None, cmd_default), cmd_doc)
-
-class MyOption (optparse.Option) :
- """
- Our custom types for optparse
- """
-
- def check_date (option, opt, value) :
- """
- Parse a date
- """
-
- try :
- # parse
- return datetime.datetime.strptime(value, '%Y-%m-%d')
-
- # trap -> OptionValueError
- except Exception, e :
- raise optparse.OptionValueError("option %s: invalid date value: %r" % (opt, value))
-
- def check_timezone (option, opt, value) :
- """
- Parse a timezone
- """
-
- try :
- # parse
- return pytz.timezone(value)
-
- # trap -> OptionValueError
- except Exception, e :
- raise optparse.OptionValueError("option %s: invalid timezone: %r" % (opt, value))
-
- def take_action (self, action, dest, opt, value, values, parser) :
- """
- Override take_action to handle date
- """
-
- if action == "parse_date" :
- # get timezone
- tz = values.timezone
-
- # set timezone
- value = value.replace(tzinfo=tz)
-
- # store
- return optparse.Option.take_action(self, 'store', dest, opt, value, values, parser)
-
- else :
- # default
- return optparse.Option.take_action(self, action, dest, opt, value, values, parser)
-
- TYPES = optparse.Option.TYPES + ('date', 'timezone')
- TYPE_CHECKER = optparse.Option.TYPE_CHECKER.copy()
- TYPE_CHECKER['date'] = check_date
- TYPE_CHECKER['timezone'] = check_timezone
- ACTIONS = optparse.Option.ACTIONS + ('parse_date', )
- STORE_ACTIONS = optparse.Option.STORE_ACTIONS + ('parse_date', )
- TYPED_ACTIONS = optparse.Option.TYPED_ACTIONS + ('parse_date', )
- ACTIONS = optparse.Option.ACTIONS + ('parse_date', )
-
-def main (argv) :
- """
- Command-line main, with given argv
- """
-
- # define parser
- parser = optparse.OptionParser(
- usage = "%prog [options] <command> [ ... ]",
- add_help_option = False,
- option_class = MyOption,
- )
-
- # general options # # # #
- general = optparse.OptionGroup(parser, "General Options")
- general.add_option('-h', "--help", dest="help", help="Show this help message and exit",
- action="store_true" )
-
- general.add_option( "--formatter", dest="formatter_name", help="LogFormatter to use",
- metavar="FMT", type="choice", default=config.PREF_FORMATTER_DEFAULT.name,
- choices=[fmt_name for fmt_name in config.LOG_FORMATTERS.iterkeys()] )
-
- general.add_option( "--index", dest="index_path", help="Index database path",
- metavar="PATH", default=config.SEARCH_INDEX_PATH )
-
- general.add_option( "--timezone", dest="timezone", help="Timezone for output",
- metavar="TZ", type="timezone", default=pytz.utc )
-
- general.add_option( "--force", dest="force", help="Force dangerous operation",
- action="store_true" )
-
- general.add_option( "--quiet", dest="quiet", help="Supress status messages",
- action="store_true" )
- parser.add_option_group(general)
-
-
- # cmd_load options # # # #
- load = optparse.OptionGroup(parser, "Load Options")
- load.add_option( "--skip-missing", dest="skip_missing", help="Skip missing logfiles",
- action="store_true" )
-
- load.add_option( "--create", dest="create", help="Create index database",
- action="store_true" )
- parser.add_option_group(load)
-
-
- # cmd_autoload options # # # #
- autoload = optparse.OptionGroup(parser, "Autoload Options")
- autoload.add_option( "--autoload-state", dest="autoload_state_path", help="Path to autoload state dir",
- metavar="PATH", default=config.SEARCH_AUTOINDEX_PATH)
-
- autoload.add_option( "--from", dest="after", help="Only autoload logfiles from the given date on",
- metavar="DATE", type="date", action="parse_date", default=None )
-
- autoload.add_option( "--until", dest="until", help="Only autoload logfiles up to (and including) the given date",
- metavar="DATE", type="date", action="parse_date", default=None )
-
- autoload.add_option( "--reload", dest="reload", help="Force reload lines",
- action="store_true" )
-
- autoload.add_option( "--reset", dest="reset", help="Reset old autload state",
- action="store_true" )
-
- autoload.add_option( "--ignore-resume", dest="ignore_resume", help="Do not try and resume interrupted autoload",
- action="store_true" )
- parser.add_option_group(autoload)
-
- # parse
- options, args = parser.parse_args(argv[1:])
-
- # postprocess stuff
- options._parser = parser
- options.formatter = config.LOG_FORMATTERS[options.formatter_name](options.timezone, "%H:%M:%S", None, None)
-
- # special-case --help
- if options.help :
- return cmd_help(options, *args)
-
- # must have at least the command argument
- if not args :
- raise CommandError("Missing command")
-
- # pop command
- command = args.pop(0)
-
- # get func
- func = globals().get('cmd_%s' % command)
-
- # unknown command?
- if not func :
- raise CommandError("Unknown command: %s" % command)
-
- # call
- func(options, *args)
-
-if __name__ == '__main__' :
- try :
- main(sys.argv)
- sys.exit(0)
-
- except CommandError, e :
- print e
- sys.exit(1)
-