scripts/search-index.py
changeset 82 afd3120ec71e
parent 68 8157c41b3236
child 83 a34e9f56ddda
equal deleted inserted replaced
81:745032a57803 82:afd3120ec71e
       
     1 """
       
     2     Tool for accessing the search index
       
     3 """
       
     4 
       
     5 # XXX: fix path
       
     6 import sys; sys.path.insert(0, '.'); sys.path.insert(0, '..')
       
     7 
       
     8 import datetime, pytz
       
     9 
       
    10 # configuration and the LogSearchIndex module
       
    11 import config, log_search, channels
       
    12 
       
    13 def _open_index_and_channel (options, channel_name, open_mode) :
       
    14     """
       
    15         Opens+returns a LogSearchIndex and a LogChannel
       
    16     """
       
    17     # open the LogSearchIndex
       
    18     index = log_search.LogSearchIndex(options.index_path, open_mode)
       
    19 
       
    20     # open the channel
       
    21     channel = config.LOG_CHANNELS.lookup(channel_name)
       
    22     
       
    23     # return
       
    24     return index, channel
       
    25 
       
    26 def _load_channel_date (index, options, channel, date) :
       
    27     """
       
    28         Loads the logs for the given date from the channel's LogSource into the given LogSearchIndex
       
    29     """
       
    30 
       
    31     if not options.quiet :
       
    32         print "%s %s..." % (channel.id, date.strftime(channel.source.filename_fmt)),
       
    33         
       
    34     try :
       
    35         # load lines for date
       
    36         lines = channel.source.get_date(date)
       
    37     
       
    38     except Exception, e :
       
    39         if not options.skip_missing :
       
    40             raise
       
    41             
       
    42         if not options.quiet :
       
    43             print "Skipped: %s" % (e, )
       
    44     
       
    45     else :
       
    46         # insert -> count
       
    47         count = index.insert(channel, lines)
       
    48 
       
    49         if not options.quiet :
       
    50             print "OK: %d lines" % count
       
    51 
       
    52 def cmd_load (options, channel_name, *dates) :
       
    53     """
       
    54         Loads the logs for a specific channel for the given dates (in terms of the channe logs' timezone) into the index
       
    55     """
       
    56 
       
    57     # open index/channel
       
    58     index, channel = _open_index_and_channel(options, channel_name, '*' if options.create_index else 'a')
       
    59     
       
    60     # handle each date
       
    61     for date_name in dates :
       
    62         try :
       
    63             # parse date
       
    64             date = datetime.datetime.strptime(date_name, '%Y-%m-%d').replace(tzinfo=channel.source.tz)
       
    65 
       
    66         except Exception, e :
       
    67             print "[ERROR] Invalid date: %s: %s" % (date_name, e)
       
    68 
       
    69             if options.skip_missing :
       
    70                 continue
       
    71 
       
    72             else :
       
    73                 raise
       
    74         
       
    75         # load
       
    76         _load_channel_date(index, options, channel, date)
       
    77 
       
    78 def cmd_load_month (options, channel_name, *months) :
       
    79     """
       
    80         Loads the logs for a specific channel for the given months (in terms of the channel's timezone) into the index
       
    81     """
       
    82 
       
    83     # open index/channel
       
    84     index, channel = _open_index_and_channel(options, channel_name, '*' if options.create_index else 'a')
       
    85     
       
    86     # handle each date
       
    87     for month_name in months :
       
    88         try :
       
    89             # parse date
       
    90             month = datetime.datetime.strptime(month_name, '%Y-%m').replace(tzinfo=channel.source.tz)
       
    91 
       
    92         except Exception, e :
       
    93             print "[ERROR] Invalid date: %s: %s" % (month_name, e)
       
    94 
       
    95             if options.skip_missing :
       
    96                 continue
       
    97 
       
    98             else :
       
    99                 raise
       
   100         
       
   101         # get the set of days
       
   102         days = channel.source.get_month_days(month)
       
   103 
       
   104         print "Loading %d days of logs:" % (len(days))
       
   105 
       
   106         # load each day
       
   107         for date in days :
       
   108             # convert to datetime
       
   109             dt = datetime.datetime.combine(date, datetime.time(0)).replace(tzinfo=channel.source.tz)
       
   110             
       
   111             # load
       
   112             _load_channel_date(index, options, channel, dt)
       
   113 
       
   114 def cmd_search (options, channel_name, query) :
       
   115     """
       
   116         Search the index for events on a specific channel with the given query
       
   117     """
       
   118     
       
   119     # sanity-check
       
   120     if options.create_index :
       
   121         raise Exception("--create doesn't make sense for 'search'")
       
   122     
       
   123     # open index/channel
       
   124     index, channel = _open_index_and_channel(options, channel_name, 'r')
       
   125     
       
   126     # search
       
   127     lines = index.search_simple(channel, query)
       
   128     
       
   129     # display as plaintext
       
   130     for line in options.formatter.format_txt(lines) :
       
   131         print line
       
   132 
       
   133 if __name__ == '__main__' :
       
   134     from optparse import OptionParser
       
   135     
       
   136     # define parser
       
   137     parser = OptionParser(
       
   138         usage           = "%prog [options] <command> [ ... ]",
       
   139         add_help_option = True,
       
   140     )
       
   141 
       
   142     # define command-line arguments
       
   143     parser.add_option("-I", "--index", dest="index_path", help="Index database path", metavar="PATH", default="logs/index")
       
   144     parser.add_option("--create", dest="create_index", action="store_true", help="Create index database")
       
   145     parser.add_option("-f", "--formatter", dest="formatter_name", help="LogFormatter to use", default="irssi")
       
   146     parser.add_option("-z", "--timezone", dest="tz_name", help="Timezone for output", metavar="TZ", default="UTC")
       
   147     parser.add_option("--skip-missing", dest="skip_missing", action="store_true", help="Skip missing logfiles")
       
   148     parser.add_option("--quiet", dest="quiet", action="store_true", help="Supress status messages")
       
   149 
       
   150     # parse
       
   151     options, args = parser.parse_args()
       
   152 
       
   153     # postprocess stuff
       
   154     options.tz = pytz.timezone(options.tz_name)
       
   155     options.formatter = config.LOG_FORMATTERS[options.formatter_name](options.tz, "%H:%M:%S", None, None)
       
   156     
       
   157     # pop command
       
   158     if not args :
       
   159         raise Exception("Missing command")
       
   160 
       
   161     command = args.pop(0)
       
   162 
       
   163     # inspect
       
   164     func = globals()['cmd_%s' % command]
       
   165     
       
   166     # call
       
   167     func(options, *args)
       
   168 
       
   169