# HG changeset patch # User Tero Marttila # Date 1234305201 -7200 # Node ID 2dc6de43f317d96caf226f707c10bf56e0d4e496 # Parent 0b8e2ba5f76fc990d41ab55c3048668efb23a421 add utils.to/from_utc_timestamp functions, fix LogSearchIndex to store all LogLine attributes, add list() method to get LogLines for a given date, and improve scripts/search-index diff -r 0b8e2ba5f76f -r 2dc6de43f317 log_search.py --- a/log_search.py Tue Feb 10 23:59:56 2009 +0200 +++ b/log_search.py Wed Feb 11 00:33:21 2009 +0200 @@ -7,7 +7,7 @@ import HyperEstraier as hype -import log_line +import log_line, utils class LogSearchError (Exception) : """ @@ -30,13 +30,17 @@ This uses Hyper Estraier to handle searching, whereby each log line is a document (yes, I have a powerful server). These log documents have the following attributes: - @uri - channel/date/line - channel - channel code - type - the LogType id - timestamp - UTC timestamp - source_nickname - source nickname + @uri - channel/date/line + channel - channel code + type - the LogType id + timestamp - UTC timestamp + source_nickname - source nickname + source_username - source username + source_hostname - source hostname + source_chanflags - source channel flags + target_nickname - target nickname - Each document then has a single line of data, which is the log message itself + Each document then has a single line of data, which is the log data message """ def __init__ (self, channels, path, mode='r') : @@ -101,9 +105,6 @@ # line date date = line.timestamp.date() - # convert to UTC timestamp - utc_timestamp = calendar.timegm(line.timestamp.utctimetuple()) - # ensure that it's not 1900 assert date.year != 1900 @@ -117,18 +118,31 @@ doc.add_attr('type', str(line.type)) # add UTC timestamp - doc.add_attr('timestamp', str(utc_timestamp)) + doc.add_attr('timestamp', str(utils.to_utc_timestamp(line.timestamp))) # add source attribute? if line.source : source_nickname, source_username, source_hostname, source_chanflags = line.source - # XXX: handle source_nickname is None - if not source_nickname is None : - source_nickname = str(source_nickname) + if source_nickname : + doc.add_attr('source_nickname', source_nickname.encode('utf8')) + + if source_username : + doc.add_attr('source_username', source_username.encode('utf8')) - doc.add_attr('source_nickname', source_nickname) + if source_hostname : + doc.add_attr('source_hostname', source_hostname.encode('utf8')) + + if source_chanflags : + doc.add_attr('source_chanflags', source_chanflags.encode('utf8')) + # add target attributes? + if line.target : + target_nickname = line.target + + if target_nickname : + doc.add_attr('target_nickname', target_nickname.encode('utf8')) + # add data if line.data : doc.add_text(line.data.encode('utf8')) @@ -164,19 +178,27 @@ # load the attributes/text channel = self.channels.lookup(doc.attr('channel')) type = int(doc.attr('type')) - timestamp = datetime.datetime.fromtimestamp(int(doc.attr('timestamp')), pytz.utc) - source_nickname = doc.attr('source_nickname') + timestamp = utils.from_utc_timestamp(int(doc.attr('timestamp'))) + + # source + source = (doc.attr('source_nickname'), doc.attr('source_username'), doc.attr('source_hostname'), doc.attr('source_chanflags')) + + # target + target = doc.attr('target_nickname') + + # message text message = doc.cat_texts().decode('utf8') # build+yield to as LogLine - yield log_line.LogLine(channel, None, type, timestamp, (source_nickname, None, None, None), None, message) + yield log_line.LogLine(channel, None, type, timestamp, source, target, message) - def search (self, options=None, channel=None, phrase=None, order=None, max=None, skip=None) : + def search (self, options=None, channel=None, attrs=None, phrase=None, order=None, max=None, skip=None) : """ Search with flexible parameters options - bitmask of hype.Condition.* channel - LogChannel object + attrs - raw attribute expressions phrase - the search query phrase order - order attribute expression max - number of results to return @@ -192,8 +214,13 @@ if channel : # add channel attribute - cond.add_attr("@channel STREQ %s" % (channel.id, )) + cond.add_attr("channel STREQ %s" % (channel.id, )) + if attrs : + # add attributes + for attr in attrs : + cond.add_attr(attr) + if phrase : # add phrase cond.set_phrase(phrase) @@ -229,8 +256,8 @@ # given phrase phrase = query, - # order by timestamp - order = "@timestamp NUMD", + # order by timestamp, descending (backwards) + order = "timestamp NUMD", # count/offset max = count, @@ -240,3 +267,29 @@ # reverse return reversed(results) + def list (self, channel, date, count=None, skip=None) : + """ + List all indexed log items for the given UTC date + """ + + # start/end dates + dt_start = datetime.datetime(date.year, date.month, date.day, 0, 0, 0, 0) + dt_end = datetime.datetime(date.year, date.month, date.day, 23, 23, 59, 999999) + + # search + return self.search( + # specific channel + channel = channel, + + # specific date range + attrs = [ + "timestamp NUMBT %d %d" % (utils.to_utc_timestamp(dt_start), utils.to_utc_timestamp(dt_end)) + ], + + # order correctly + order = "timestamp NUMA", + + # max count/offset + max = count, + skip = skip + ) diff -r 0b8e2ba5f76f -r 2dc6de43f317 scripts/search-index --- a/scripts/search-index Tue Feb 10 23:59:56 2009 +0200 +++ b/scripts/search-index Wed Feb 11 00:33:21 2009 +0200 @@ -60,6 +60,32 @@ if not options.quiet : print "OK: %d lines" % count + +def _parse_date (options, date_str, tz=None, fmt='%Y-%m-%d') : + """ + Parse the given datetime, using the given timezone(defaults to options.tz) and format + """ + + # default tz + if not tz : + tz = options.tz + + try : + # parse + return datetime.datetime.strptime(date_str, fmt).replace(tzinfo=tz) + + except Exception, e : + raise CommandError("[ERROR] Invalid date: %s: %s" % (date_str, e)) + +def _output_lines (options, lines) : + """ + Display the formatted LogLines + """ + + # display as plaintext + for line, txt_data in options.formatter.format_txt(lines, full_timestamps=True) : + print txt_data + class CommandError (Exception) : """ Error with command-line arguments @@ -87,22 +113,22 @@ index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a') # handle each date - for date_name in dates : + for date_str in dates : + # prase date try : - # parse date - date = datetime.datetime.strptime(date_name, '%Y-%m-%d').replace(tzinfo=channel.source.tz) - - except Exception, e : - print "[ERROR] Invalid date: %s: %s" % (date_name, e) - + date = _parse_date(options, date_str, channel.source.tz) + + # handle errors + except CommandError, e : if options.skip_missing : - continue + print "[ERROR] %s" % (date_name, e) else : raise - # load - _load_channel_date(index, options, channel, date) + # otherwise, load + else : + _load_channel_date(index, options, channel, date) def cmd_load_month (options, channel_name, *months) : """ @@ -113,15 +139,16 @@ index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a') # handle each date - for month_name in months : + for month_str in months : + # prase date try : - # parse date - month = datetime.datetime.strptime(month_name, '%Y-%m').replace(tzinfo=channel.source.tz) - - except Exception, e : - print "[ERROR] Invalid date: %s: %s" % (month_name, e) - + month = _parse_date(options, month_str, channel.source.tz, '%Y-%m') + + # handle errors + except CommandError, e : + # skip? if options.skip_missing : + print "[ERROR] %s" % (date_name, e) continue else : @@ -146,7 +173,7 @@ """ # sanity-check - if options.create_index : + if options.create : raise Exception("--create doesn't make sense for 'search'") # open index/channel @@ -155,9 +182,31 @@ # search lines = index.search_simple(channel, query) - # display as plaintext - for line in options.formatter.format_txt(lines) : - print line + # display + _output_lines(options, lines) + +def cmd_list (options, channel_name, *dates) : + """ + List the indexed events for a specific date + """ + + # sanity-check + if options.create : + raise Exception("--create doesn't make sense for 'search'") + + # open index/channel + index, channel = _open_index_and_channel(options, channel_name, 'r') + + # ...for each date + for date_str in dates : + # parse date + date = _parse_date(options, date_str) + + # list + lines = index.list(channel, date) + + # display + _output_lines(options, lines) def cmd_help (options, *args) : """ diff -r 0b8e2ba5f76f -r 2dc6de43f317 utils.py --- a/utils.py Tue Feb 10 23:59:56 2009 +0200 +++ b/utils.py Wed Feb 11 00:33:21 2009 +0200 @@ -67,13 +67,27 @@ """ timestamp_str -> pytz.utc datetime.datetime """ - - return datetime.datetime.utcfromtimestamp(int(timestamp_str)).replace(tzinfo=pytz.utc) + + return from_utc_timestamp(int(timestamp_str)) def build (self, dtz) : """ pytz.utc datetime.datetime -> timestamp_str """ + + return str(to_utc_timestamp(dtz)) - return str(calendar.timegm(dtz.utctimetuple())) +def from_utc_timestamp (timestamp) : + """ + Converts a UNIX timestamp into a datetime.datetime + """ + return datetime.datetime.utcfromtimestamp(timestamp).replace(tzinfo=pytz.utc) + +def to_utc_timestamp (dt) : + """ + Converts a datetime.datetime into a UNIX timestamp + """ + + return calendar.timegm(dt.utctimetuple()) +