scripts/search-index
changeset 88 0b8e2ba5f76f
parent 83 a34e9f56ddda
child 89 2dc6de43f317
equal deleted inserted replaced
87:39915772f090 88:0b8e2ba5f76f
       
     1 #!/usr/bin/env python2.5
       
     2 
       
     3 """
       
     4     Tool for accessing the search index
       
     5 """
       
     6 
       
     7 # XXX: fix path
       
     8 import sys; sys.path.insert(0, '.'); sys.path.insert(0, '..')
       
     9 
       
    10 import datetime, pytz
       
    11 
       
    12 # configuration and the LogSearchIndex module
       
    13 import config, log_search, channels
       
    14 
       
    15 def _open_index (options, open_mode) :
       
    16     """
       
    17         Opens the LogSearchIndex
       
    18     """
       
    19 
       
    20     return log_search.LogSearchIndex(config.LOG_CHANNELS, options.index_path, open_mode)
       
    21 
       
    22 
       
    23 def _open_index_and_channel (options, channel_name, open_mode) :
       
    24     """
       
    25         Opens+returns a LogSearchIndex and a LogChannel
       
    26     """
       
    27     
       
    28     # open the LogSearchIndex
       
    29     index = _open_index(options, open_mode)
       
    30 
       
    31     # open the channel
       
    32     channel = config.LOG_CHANNELS.lookup(channel_name)
       
    33     
       
    34     # return
       
    35     return index, channel
       
    36 
       
    37 def _load_channel_date (index, options, channel, date) :
       
    38     """
       
    39         Loads the logs for the given date from the channel's LogSource into the given LogSearchIndex
       
    40     """
       
    41 
       
    42     if not options.quiet :
       
    43         print "%s %s..." % (channel.id, date.strftime(channel.source.filename_fmt)),
       
    44         
       
    45     try :
       
    46         # load lines for date
       
    47         lines = channel.source.get_date(date)
       
    48     
       
    49     except Exception, e :
       
    50         if not options.skip_missing :
       
    51             raise
       
    52             
       
    53         if not options.quiet :
       
    54             print "Skipped: %s" % (e, )
       
    55     
       
    56     else :
       
    57         # insert -> count
       
    58         count = index.insert(channel, lines)
       
    59 
       
    60         if not options.quiet :
       
    61             print "OK: %d lines" % count
       
    62 
       
    63 class CommandError (Exception) :
       
    64     """
       
    65         Error with command-line arguments
       
    66     """
       
    67 
       
    68     pass
       
    69 
       
    70 def cmd_create (options) :
       
    71     """
       
    72         Creates a new index
       
    73     """
       
    74 
       
    75     # open index
       
    76     index = _open_index(options, 'c' if not options.force else '*')
       
    77 
       
    78     # that's all
       
    79     pass
       
    80 
       
    81 def cmd_load (options, channel_name, *dates) :
       
    82     """
       
    83         Loads the logs for a specific channel for the given dates (in terms of the channe logs' timezone) into the index
       
    84     """
       
    85 
       
    86     # open index/channel
       
    87     index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a')
       
    88     
       
    89     # handle each date
       
    90     for date_name in dates :
       
    91         try :
       
    92             # parse date
       
    93             date = datetime.datetime.strptime(date_name, '%Y-%m-%d').replace(tzinfo=channel.source.tz)
       
    94 
       
    95         except Exception, e :
       
    96             print "[ERROR] Invalid date: %s: %s" % (date_name, e)
       
    97 
       
    98             if options.skip_missing :
       
    99                 continue
       
   100 
       
   101             else :
       
   102                 raise
       
   103         
       
   104         # load
       
   105         _load_channel_date(index, options, channel, date)
       
   106 
       
   107 def cmd_load_month (options, channel_name, *months) :
       
   108     """
       
   109         Loads the logs for a specific channel for the given months (in terms of the channel's timezone) into the index
       
   110     """
       
   111 
       
   112     # open index/channel
       
   113     index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a')
       
   114     
       
   115     # handle each date
       
   116     for month_name in months :
       
   117         try :
       
   118             # parse date
       
   119             month = datetime.datetime.strptime(month_name, '%Y-%m').replace(tzinfo=channel.source.tz)
       
   120 
       
   121         except Exception, e :
       
   122             print "[ERROR] Invalid date: %s: %s" % (month_name, e)
       
   123 
       
   124             if options.skip_missing :
       
   125                 continue
       
   126 
       
   127             else :
       
   128                 raise
       
   129         
       
   130         # get the set of days
       
   131         days = list(channel.source.get_month_days(month))
       
   132 
       
   133         print "Loading %d days of logs:" % (len(days))
       
   134 
       
   135         # load each day
       
   136         for date in days :
       
   137             # convert to datetime
       
   138             dt = datetime.datetime.combine(date, datetime.time(0)).replace(tzinfo=channel.source.tz)
       
   139             
       
   140             # load
       
   141             _load_channel_date(index, options, channel, dt)
       
   142 
       
   143 def cmd_search (options, channel_name, query) :
       
   144     """
       
   145         Search the index for events on a specific channel with the given query
       
   146     """
       
   147     
       
   148     # sanity-check
       
   149     if options.create_index :
       
   150         raise Exception("--create doesn't make sense for 'search'")
       
   151     
       
   152     # open index/channel
       
   153     index, channel = _open_index_and_channel(options, channel_name, 'r')
       
   154     
       
   155     # search
       
   156     lines = index.search_simple(channel, query)
       
   157     
       
   158     # display as plaintext
       
   159     for line in options.formatter.format_txt(lines) :
       
   160         print line
       
   161 
       
   162 def cmd_help (options, *args) :
       
   163     """
       
   164         Help about commands
       
   165     """
       
   166 
       
   167     import inspect
       
   168     
       
   169     # general help stuff
       
   170     options._parser.print_help()
       
   171 
       
   172     # specific command?
       
   173     if args :
       
   174         # the command name
       
   175         command, = args
       
   176         
       
   177         # XXX: display info about specific command
       
   178         xxx
       
   179     
       
   180     # general
       
   181     else :
       
   182         print
       
   183         print "Available commands:"
       
   184 
       
   185         # build list of all cmd_* objects
       
   186         cmd_objects = [(name, obj) for name, obj in globals().iteritems() if name.startswith('cmd_') and inspect.isfunction(obj)]
       
   187 
       
   188         # sort alphabetically
       
   189         cmd_objects.sort()
       
   190         
       
   191         # iterate through all cmd_* objects
       
   192         for cmd_func_name, cmd_func in cmd_objects :
       
   193             # remove cmd_ prefix
       
   194             cmd_name = cmd_func_name[4:]
       
   195 
       
   196             # inspect
       
   197             cmd_args, cmd_varargs, cmd_varkw, cmd_default = inspect.getargspec(cmd_func)
       
   198             cmd_doc = inspect.getdoc(cmd_func)
       
   199 
       
   200             # remove the "options" arg
       
   201             cmd_args = cmd_args[1:]
       
   202 
       
   203             # display
       
   204             print "\t%10s %-30s : %s" % (cmd_name, inspect.formatargspec(cmd_args, cmd_varargs, None, cmd_default), cmd_doc)
       
   205 
       
   206 def main (argv) :
       
   207     """
       
   208         Command-line main, with given argv
       
   209     """
       
   210 
       
   211     from optparse import OptionParser
       
   212     
       
   213     # define parser
       
   214     parser = OptionParser(
       
   215         usage           = "%prog [options] <command> [ ... ]",
       
   216         add_help_option = False,
       
   217     )
       
   218 
       
   219     # define command-line arguments
       
   220     parser.add_option('-h', "--help",           dest="help",            help="Show this help message and exit",     action="store_true")
       
   221     parser.add_option('-F', "--formatter",      dest="formatter_name",  help="LogFormatter to use",                 metavar="FMT",  type="choice", default="irssi",
       
   222         choices=[fmt_name for fmt_name in config.LOG_FORMATTERS.iterkeys()])
       
   223 
       
   224     parser.add_option('-I', "--index",          dest="index_path",      help="Index database path",                 metavar="PATH", default="logs/index")
       
   225     parser.add_option('-Z', "--timezone",       dest="tz_name",         help="Timezone for output",                 metavar="TZ",   default="UTC")
       
   226     parser.add_option('-f', "--force",          dest="force",           help="Force dangerous operation",           action="store_true")
       
   227     parser.add_option(      "--create",         dest="create",          help="Create index database",               action="store_true")
       
   228     parser.add_option(      "--skip-missing",   dest="skip_missing",    help="Skip missing logfiles",               action="store_true")
       
   229     parser.add_option(      "--quiet",          dest="quiet",           help="Supress status messages",             action="store_true")
       
   230 
       
   231     # parse
       
   232     options, args = parser.parse_args(argv[1:])
       
   233 
       
   234     # postprocess stuff
       
   235     options._parser = parser
       
   236     options.tz = pytz.timezone(options.tz_name)
       
   237     options.formatter = config.LOG_FORMATTERS[options.formatter_name](options.tz, "%H:%M:%S", None, None)
       
   238 
       
   239     # special-case --help
       
   240     if options.help :
       
   241         return cmd_help(options, *args)
       
   242     
       
   243     # must have at least the command argument
       
   244     if not args :
       
   245         raise CommandError("Missing command")
       
   246     
       
   247     # pop command
       
   248     command = args.pop(0)
       
   249     
       
   250     # get func
       
   251     func = globals().get('cmd_%s' % command)
       
   252     
       
   253     # unknown command?
       
   254     if not func :
       
   255         raise CommandError("Unknown command: %s" % command)
       
   256     
       
   257     # call
       
   258     func(options, *args)
       
   259 
       
   260 if __name__ == '__main__' :
       
   261     try :
       
   262         main(sys.argv)
       
   263         sys.exit(0)
       
   264 
       
   265     except CommandError, e :
       
   266         print e
       
   267         sys.exit(1)
       
   268