|
1 #!/usr/bin/env python2.5 |
|
2 |
|
3 """ |
|
4 Tool for accessing the search index |
|
5 """ |
|
6 |
|
7 # XXX: fix path |
|
8 import sys; sys.path.insert(0, '.'); sys.path.insert(0, '..') |
|
9 |
|
10 import os, os.path, fcntl |
|
11 import datetime, pytz |
|
12 import optparse |
|
13 |
|
14 # configuration and the LogSearchIndex module |
|
15 from qmsk.irclogs import config, utils, log_search, channels |
|
16 |
|
17 def _open_index (options, open_mode) : |
|
18 """ |
|
19 Opens the LogSearchIndex |
|
20 """ |
|
21 |
|
22 return log_search.LogSearchIndex(config.LOG_CHANNELS, options.index_path, open_mode) |
|
23 |
|
24 |
|
25 def _open_index_and_channel (options, channel_name, open_mode) : |
|
26 """ |
|
27 Opens+returns a LogSearchIndex and a LogChannel |
|
28 """ |
|
29 |
|
30 # open the LogSearchIndex |
|
31 index = _open_index(options, open_mode) |
|
32 |
|
33 # open the channel |
|
34 channel = config.LOG_CHANNELS.lookup(channel_name) |
|
35 |
|
36 # return |
|
37 return index, channel |
|
38 |
|
39 def _iter_insert_stats (index, channel, lines) : |
|
40 """ |
|
41 Insert the given lines into the index. |
|
42 |
|
43 Assumes the lines will be in time-order, and yields a series of (date, count) tuples for every date that lines |
|
44 are inserted for |
|
45 """ |
|
46 |
|
47 # last date |
|
48 date = None |
|
49 |
|
50 # count |
|
51 count = 0 |
|
52 |
|
53 # iter lines |
|
54 for line in lines : |
|
55 # next day? |
|
56 if not date or line.timestamp.date() != date : |
|
57 if date : |
|
58 # yield stats |
|
59 yield date, count |
|
60 |
|
61 # reset count |
|
62 count = 0 |
|
63 |
|
64 # timestamp's date |
|
65 date = line.timestamp.date() |
|
66 |
|
67 # insert |
|
68 index.insert_line(channel, line) |
|
69 |
|
70 # count |
|
71 count += 1 |
|
72 |
|
73 # final count? |
|
74 if date and count : |
|
75 yield date, count |
|
76 |
|
77 def _insert_lines (index, options, channel, lines) : |
|
78 """ |
|
79 Insert the given lines into the index. |
|
80 |
|
81 Assumes the lines will be in time-order, and prints out as status messages the date and count for the inserted lines |
|
82 """ |
|
83 |
|
84 # iterate insert stats |
|
85 for date, count in _iter_insert_stats(index, channel, lines) : |
|
86 # output date header? |
|
87 if not options.quiet : |
|
88 print "%s: %s" % (date.strftime('%Y-%m-%d'), count), |
|
89 |
|
90 def _load_channel_date (index, options, channel, date) : |
|
91 """ |
|
92 Loads the logs for the given date from the channel's LogSource into the given LogSearchIndex |
|
93 """ |
|
94 |
|
95 if not options.quiet : |
|
96 print "Loading date for channel %s" % channel.id |
|
97 |
|
98 try : |
|
99 # load lines for date |
|
100 lines = channel.source.get_date(date) |
|
101 |
|
102 except Exception, e : |
|
103 if not options.skip_missing : |
|
104 raise |
|
105 |
|
106 if not options.quiet : |
|
107 print "\tSkipped: %s" % (e, ) |
|
108 |
|
109 else : |
|
110 # insert |
|
111 _insert_lines(index, options, channel, lines) |
|
112 |
|
113 def _parse_date (options, date_str, tz=None, fmt='%Y-%m-%d') : |
|
114 """ |
|
115 Parse the given datetime, using the given timezone(defaults to options.tz) and format |
|
116 """ |
|
117 |
|
118 # default tz |
|
119 if not tz : |
|
120 tz = options.timezone |
|
121 |
|
122 try : |
|
123 # parse |
|
124 return datetime.datetime.strptime(date_str, fmt).replace(tzinfo=tz) |
|
125 |
|
126 except Exception, e : |
|
127 raise CommandError("[ERROR] Invalid date: %s: %s" % (date_str, e)) |
|
128 |
|
129 def _output_lines (options, lines) : |
|
130 """ |
|
131 Display the formatted LogLines |
|
132 """ |
|
133 |
|
134 # display as plaintext |
|
135 for line, txt_data in options.formatter.format_txt(lines, full_timestamps=True) : |
|
136 print txt_data |
|
137 |
|
138 class CommandError (Exception) : |
|
139 """ |
|
140 Error with command-line arguments |
|
141 """ |
|
142 |
|
143 pass |
|
144 |
|
145 def cmd_create (options) : |
|
146 """ |
|
147 Creates a new index |
|
148 """ |
|
149 |
|
150 # open index |
|
151 index = _open_index(options, 'ctrunc' if options.force else 'c') |
|
152 |
|
153 # that's all |
|
154 pass |
|
155 |
|
156 def cmd_load (options, channel_name, *dates) : |
|
157 """ |
|
158 Loads the logs for a specific channel for the given dates (in terms of the channe logs' timezone) into the index |
|
159 """ |
|
160 |
|
161 # open index/channel |
|
162 index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a') |
|
163 |
|
164 # handle each date |
|
165 for date_str in dates : |
|
166 # prase date |
|
167 try : |
|
168 date = _parse_date(options, date_str, channel.source.tz) |
|
169 |
|
170 # handle errors |
|
171 except CommandError, e : |
|
172 if options.skip_missing : |
|
173 print "[ERROR] %s" % (date_name, e) |
|
174 |
|
175 else : |
|
176 raise |
|
177 |
|
178 # otherwise, load |
|
179 else : |
|
180 _load_channel_date(index, options, channel, date) |
|
181 |
|
182 def cmd_load_month (options, channel_name, *months) : |
|
183 """ |
|
184 Loads the logs for a specific channel for the given months (in terms of the channel's timezone) into the index |
|
185 """ |
|
186 |
|
187 # open index/channel |
|
188 index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a') |
|
189 |
|
190 # handle each date |
|
191 for month_str in months : |
|
192 # prase date |
|
193 try : |
|
194 month = _parse_date(options, month_str, channel.source.tz, '%Y-%m') |
|
195 |
|
196 # handle errors |
|
197 except CommandError, e : |
|
198 # skip? |
|
199 if options.skip_missing : |
|
200 if not options.quiet : |
|
201 print "[ERROR] %s" % (date_name, e) |
|
202 continue |
|
203 |
|
204 else : |
|
205 raise |
|
206 |
|
207 # get the set of days |
|
208 days = list(channel.source.get_month_days(month)) |
|
209 |
|
210 if not options.quiet : |
|
211 print "Loading %d days of logs:" % (len(days)) |
|
212 |
|
213 # load each day |
|
214 for date in days : |
|
215 # convert to datetime |
|
216 dt = datetime.datetime.combine(date, datetime.time(0)).replace(tzinfo=channel.source.tz) |
|
217 |
|
218 # load |
|
219 _load_channel_date(index, options, channel, dt) |
|
220 |
|
221 def cmd_search (options, channel_name, query) : |
|
222 """ |
|
223 Search the index for events on a specific channel with the given query |
|
224 """ |
|
225 |
|
226 # sanity-check |
|
227 if options.create : |
|
228 raise Exception("--create doesn't make sense for 'search'") |
|
229 |
|
230 # open index/channel |
|
231 index, channel = _open_index_and_channel(options, channel_name, 'r') |
|
232 |
|
233 # search |
|
234 lines = index.search_simple(channel, query) |
|
235 |
|
236 # display |
|
237 _output_lines(options, lines) |
|
238 |
|
239 def cmd_list (options, channel_name, *dates) : |
|
240 """ |
|
241 List the indexed events for a specific date |
|
242 """ |
|
243 |
|
244 # sanity-check |
|
245 if options.create : |
|
246 raise Exception("--create doesn't make sense for 'search'") |
|
247 |
|
248 # open index/channel |
|
249 index, channel = _open_index_and_channel(options, channel_name, 'r') |
|
250 |
|
251 # ...for each date |
|
252 for date_str in dates : |
|
253 # parse date |
|
254 date = _parse_date(options, date_str) |
|
255 |
|
256 # list |
|
257 lines = index.list(channel, date) |
|
258 |
|
259 # display |
|
260 _output_lines(options, lines) |
|
261 |
|
262 def _autoload_reset (options, channels) : |
|
263 """ |
|
264 Reset old autoload state |
|
265 """ |
|
266 |
|
267 # warn |
|
268 if not options.quiet : |
|
269 print "[WARN] Resetting autoload state for: %s" % ', '.join(channel.id for channel in channels) |
|
270 |
|
271 # iter |
|
272 for channel in channels : |
|
273 # statefile path |
|
274 statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id) |
|
275 |
|
276 # is it present? |
|
277 if not os.path.exists(statefile_path) : |
|
278 if not options.quiet : |
|
279 print "[WARN] No statefile found at %s" % statefile_path |
|
280 |
|
281 else : |
|
282 if not options.quiet : |
|
283 print "\t%s: " % channel.id, |
|
284 |
|
285 # remove the statefile |
|
286 os.remove(statefile_path) |
|
287 |
|
288 if not options.quiet : |
|
289 print "OK" |
|
290 |
|
291 def cmd_autoload (options, *channel_names) : |
|
292 """ |
|
293 Automatically loads all channel logs that have not been indexed yet (by logfile mtime) |
|
294 """ |
|
295 |
|
296 # open index, nonblocking |
|
297 index = _open_index(options, 'c?' if options.create else 'a?') |
|
298 |
|
299 # default to all channels |
|
300 if not channel_names : |
|
301 channels = config.LOG_CHANNELS |
|
302 |
|
303 else : |
|
304 channels = [config.LOG_CHANNELS.lookup(channel_name) for channel_name in channel_names] |
|
305 |
|
306 # reset autoload state? |
|
307 if options.reset : |
|
308 _autoload_reset(options, channels) |
|
309 if not options.quiet : |
|
310 print |
|
311 |
|
312 # iterate channels |
|
313 for channel in channels : |
|
314 if not options.quiet : |
|
315 print "Channel %s:" % channel.id |
|
316 |
|
317 # no 'from' by default |
|
318 after = None |
|
319 |
|
320 # path to our state file |
|
321 statefile_path = os.path.join(options.autoload_state_path, 'chan-%s' % channel.id) |
|
322 statefile_tmppath = statefile_path + '.tmp' |
|
323 |
|
324 # does it exist? |
|
325 have_tmpfile = os.path.exists(statefile_tmppath) |
|
326 |
|
327 # do we have a tempfile from a previous crash? |
|
328 if have_tmpfile and not options.ignore_resume : |
|
329 # first, open it... |
|
330 statefile_tmp = open(statefile_tmppath, 'r+') |
|
331 |
|
332 # ... then lock it |
|
333 fcntl.lockf(statefile_tmp, fcntl.LOCK_EX | fcntl.LOCK_NB) |
|
334 |
|
335 # read after timestamp |
|
336 after_str = statefile_tmp.read().rstrip() |
|
337 |
|
338 if after_str : |
|
339 # parse timestamp |
|
340 after = utils.from_utc_timestamp(int(after_str)) |
|
341 |
|
342 if not options.quiet : |
|
343 print "\tContinuing earlier progress from %s" % after |
|
344 |
|
345 else : |
|
346 # ignore |
|
347 if not options.quiet : |
|
348 print "\t[WARN] Ignoring empty temporary statefile" |
|
349 |
|
350 else : |
|
351 # warn about old tmpfile that was ignored |
|
352 if have_tmpfile and not options.quiet : |
|
353 print "\t[WARN] Ignoring old tmpfile state" |
|
354 |
|
355 # open new tempfile |
|
356 statefile_tmp = open(statefile_tmppath, 'w') |
|
357 |
|
358 # lock |
|
359 fcntl.lockf(statefile_tmp, fcntl.LOCK_EX | fcntl.LOCK_NB) |
|
360 |
|
361 # override? |
|
362 if options.reload : |
|
363 # load all |
|
364 mtime = None |
|
365 |
|
366 if not options.quiet : |
|
367 print "\tForcing reload!" |
|
368 |
|
369 # stat for mtime |
|
370 else : |
|
371 # stat for mtime, None if unknown |
|
372 mtime = utils.mtime(statefile_path, ignore_missing=True) |
|
373 |
|
374 if mtime and not options.quiet : |
|
375 print "\tLast load time was %s" % mtime |
|
376 |
|
377 elif not options.quiet : |
|
378 print "\t[WARN] No previous load state! Loading full logs" |
|
379 |
|
380 # only after some specific date? |
|
381 if options.after : |
|
382 # use unless read from tempfile |
|
383 if not after : |
|
384 after = options.after |
|
385 |
|
386 if not options.quiet : |
|
387 print "\tOnly including dates from %s onwards" % after |
|
388 |
|
389 else : |
|
390 if not options.quiet : |
|
391 print "\t[WARN] Ignoring --from because we found a tempfile" |
|
392 |
|
393 # only up to some specific date? |
|
394 if options.until : |
|
395 until = options.until |
|
396 |
|
397 if not options.quiet : |
|
398 print "\tOnly including dates up to (and including) %s" % until |
|
399 else : |
|
400 # default to now |
|
401 until = None |
|
402 |
|
403 # get lines |
|
404 lines = channel.source.get_modified(mtime, after, until) |
|
405 |
|
406 # insert |
|
407 if not options.quiet : |
|
408 print "\tLoading and inserting..." |
|
409 print |
|
410 |
|
411 # iterate insert() per day to display info and update progress |
|
412 for date, count in _iter_insert_stats(index, channel, lines) : |
|
413 # output date header? |
|
414 if not options.quiet : |
|
415 print "\t%10s: %d" % (date.strftime('%Y-%m-%d'), count) |
|
416 |
|
417 # write temp state |
|
418 statefile_tmp.seek(0) |
|
419 statefile_tmp.write(str(utils.to_utc_timestamp(datetime.datetime.combine(date, datetime.time(0))))) |
|
420 statefile_tmp.flush() |
|
421 |
|
422 # write autoload state |
|
423 open(statefile_path, 'w').close() |
|
424 |
|
425 # close+delete tempfile |
|
426 statefile_tmp.close() |
|
427 os.remove(statefile_tmppath) |
|
428 |
|
429 if not options.quiet : |
|
430 print |
|
431 |
|
432 # done |
|
433 return |
|
434 |
|
435 def cmd_help (options, *args) : |
|
436 """ |
|
437 Help about commands |
|
438 """ |
|
439 |
|
440 import inspect |
|
441 |
|
442 # general help stuff |
|
443 options._parser.print_help() |
|
444 |
|
445 # specific command? |
|
446 if args : |
|
447 # the command name |
|
448 command, = args |
|
449 |
|
450 # XXX: display info about specific command |
|
451 xxx |
|
452 |
|
453 # general |
|
454 else : |
|
455 print |
|
456 print "Available commands:" |
|
457 |
|
458 # build list of all cmd_* objects |
|
459 cmd_objects = [(name, obj) for name, obj in globals().iteritems() if name.startswith('cmd_') and inspect.isfunction(obj)] |
|
460 |
|
461 # sort alphabetically |
|
462 cmd_objects.sort() |
|
463 |
|
464 # iterate through all cmd_* objects |
|
465 for cmd_func_name, cmd_func in cmd_objects : |
|
466 # remove cmd_ prefix |
|
467 cmd_name = cmd_func_name[4:] |
|
468 |
|
469 # inspect |
|
470 cmd_args, cmd_varargs, cmd_varkw, cmd_default = inspect.getargspec(cmd_func) |
|
471 cmd_doc = inspect.getdoc(cmd_func) |
|
472 |
|
473 # remove the "options" arg |
|
474 cmd_args = cmd_args[1:] |
|
475 |
|
476 # display |
|
477 print "\t%10s %-30s : %s" % (cmd_name, inspect.formatargspec(cmd_args, cmd_varargs, None, cmd_default), cmd_doc) |
|
478 |
|
479 class MyOption (optparse.Option) : |
|
480 """ |
|
481 Our custom types for optparse |
|
482 """ |
|
483 |
|
484 def check_date (option, opt, value) : |
|
485 """ |
|
486 Parse a date |
|
487 """ |
|
488 |
|
489 try : |
|
490 # parse |
|
491 return datetime.datetime.strptime(value, '%Y-%m-%d') |
|
492 |
|
493 # trap -> OptionValueError |
|
494 except Exception, e : |
|
495 raise optparse.OptionValueError("option %s: invalid date value: %r" % (opt, value)) |
|
496 |
|
497 def check_timezone (option, opt, value) : |
|
498 """ |
|
499 Parse a timezone |
|
500 """ |
|
501 |
|
502 try : |
|
503 # parse |
|
504 return pytz.timezone(value) |
|
505 |
|
506 # trap -> OptionValueError |
|
507 except Exception, e : |
|
508 raise optparse.OptionValueError("option %s: invalid timezone: %r" % (opt, value)) |
|
509 |
|
510 def take_action (self, action, dest, opt, value, values, parser) : |
|
511 """ |
|
512 Override take_action to handle date |
|
513 """ |
|
514 |
|
515 if action == "parse_date" : |
|
516 # get timezone |
|
517 tz = values.timezone |
|
518 |
|
519 # set timezone |
|
520 value = value.replace(tzinfo=tz) |
|
521 |
|
522 # store |
|
523 return optparse.Option.take_action(self, 'store', dest, opt, value, values, parser) |
|
524 |
|
525 else : |
|
526 # default |
|
527 return optparse.Option.take_action(self, action, dest, opt, value, values, parser) |
|
528 |
|
529 TYPES = optparse.Option.TYPES + ('date', 'timezone') |
|
530 TYPE_CHECKER = optparse.Option.TYPE_CHECKER.copy() |
|
531 TYPE_CHECKER['date'] = check_date |
|
532 TYPE_CHECKER['timezone'] = check_timezone |
|
533 ACTIONS = optparse.Option.ACTIONS + ('parse_date', ) |
|
534 STORE_ACTIONS = optparse.Option.STORE_ACTIONS + ('parse_date', ) |
|
535 TYPED_ACTIONS = optparse.Option.TYPED_ACTIONS + ('parse_date', ) |
|
536 ACTIONS = optparse.Option.ACTIONS + ('parse_date', ) |
|
537 |
|
538 def main (argv) : |
|
539 """ |
|
540 Command-line main, with given argv |
|
541 """ |
|
542 |
|
543 # define parser |
|
544 parser = optparse.OptionParser( |
|
545 usage = "%prog [options] <command> [ ... ]", |
|
546 add_help_option = False, |
|
547 option_class = MyOption, |
|
548 ) |
|
549 |
|
550 # general options # # # # |
|
551 general = optparse.OptionGroup(parser, "General Options") |
|
552 general.add_option('-h', "--help", dest="help", help="Show this help message and exit", |
|
553 action="store_true" ) |
|
554 |
|
555 general.add_option( "--formatter", dest="formatter_name", help="LogFormatter to use", |
|
556 metavar="FMT", type="choice", default=config.PREF_FORMATTER_DEFAULT.name, |
|
557 choices=[fmt_name for fmt_name in config.LOG_FORMATTERS.iterkeys()] ) |
|
558 |
|
559 general.add_option( "--index", dest="index_path", help="Index database path", |
|
560 metavar="PATH", default=config.SEARCH_INDEX_PATH ) |
|
561 |
|
562 general.add_option( "--timezone", dest="timezone", help="Timezone for output", |
|
563 metavar="TZ", type="timezone", default=pytz.utc ) |
|
564 |
|
565 general.add_option( "--force", dest="force", help="Force dangerous operation", |
|
566 action="store_true" ) |
|
567 |
|
568 general.add_option( "--quiet", dest="quiet", help="Supress status messages", |
|
569 action="store_true" ) |
|
570 parser.add_option_group(general) |
|
571 |
|
572 |
|
573 # cmd_load options # # # # |
|
574 load = optparse.OptionGroup(parser, "Load Options") |
|
575 load.add_option( "--skip-missing", dest="skip_missing", help="Skip missing logfiles", |
|
576 action="store_true" ) |
|
577 |
|
578 load.add_option( "--create", dest="create", help="Create index database", |
|
579 action="store_true" ) |
|
580 parser.add_option_group(load) |
|
581 |
|
582 |
|
583 # cmd_autoload options # # # # |
|
584 autoload = optparse.OptionGroup(parser, "Autoload Options") |
|
585 autoload.add_option( "--autoload-state", dest="autoload_state_path", help="Path to autoload state dir", |
|
586 metavar="PATH", default=config.SEARCH_AUTOINDEX_PATH) |
|
587 |
|
588 autoload.add_option( "--from", dest="after", help="Only autoload logfiles from the given date on", |
|
589 metavar="DATE", type="date", action="parse_date", default=None ) |
|
590 |
|
591 autoload.add_option( "--until", dest="until", help="Only autoload logfiles up to (and including) the given date", |
|
592 metavar="DATE", type="date", action="parse_date", default=None ) |
|
593 |
|
594 autoload.add_option( "--reload", dest="reload", help="Force reload lines", |
|
595 action="store_true" ) |
|
596 |
|
597 autoload.add_option( "--reset", dest="reset", help="Reset old autload state", |
|
598 action="store_true" ) |
|
599 |
|
600 autoload.add_option( "--ignore-resume", dest="ignore_resume", help="Do not try and resume interrupted autoload", |
|
601 action="store_true" ) |
|
602 parser.add_option_group(autoload) |
|
603 |
|
604 # parse |
|
605 options, args = parser.parse_args(argv[1:]) |
|
606 |
|
607 # postprocess stuff |
|
608 options._parser = parser |
|
609 options.formatter = config.LOG_FORMATTERS[options.formatter_name](options.timezone, "%H:%M:%S", None, None) |
|
610 |
|
611 # special-case --help |
|
612 if options.help : |
|
613 return cmd_help(options, *args) |
|
614 |
|
615 # must have at least the command argument |
|
616 if not args : |
|
617 raise CommandError("Missing command") |
|
618 |
|
619 # pop command |
|
620 command = args.pop(0) |
|
621 |
|
622 # get func |
|
623 func = globals().get('cmd_%s' % command) |
|
624 |
|
625 # unknown command? |
|
626 if not func : |
|
627 raise CommandError("Unknown command: %s" % command) |
|
628 |
|
629 # call |
|
630 func(options, *args) |
|
631 |
|
632 if __name__ == '__main__' : |
|
633 try : |
|
634 main(sys.argv) |
|
635 sys.exit(0) |
|
636 |
|
637 except CommandError, e : |
|
638 print e |
|
639 sys.exit(1) |
|
640 |