diff -r 0b8e2ba5f76f -r 2dc6de43f317 log_search.py --- a/log_search.py Tue Feb 10 23:59:56 2009 +0200 +++ b/log_search.py Wed Feb 11 00:33:21 2009 +0200 @@ -7,7 +7,7 @@ import HyperEstraier as hype -import log_line +import log_line, utils class LogSearchError (Exception) : """ @@ -30,13 +30,17 @@ This uses Hyper Estraier to handle searching, whereby each log line is a document (yes, I have a powerful server). These log documents have the following attributes: - @uri - channel/date/line - channel - channel code - type - the LogType id - timestamp - UTC timestamp - source_nickname - source nickname + @uri - channel/date/line + channel - channel code + type - the LogType id + timestamp - UTC timestamp + source_nickname - source nickname + source_username - source username + source_hostname - source hostname + source_chanflags - source channel flags + target_nickname - target nickname - Each document then has a single line of data, which is the log message itself + Each document then has a single line of data, which is the log data message """ def __init__ (self, channels, path, mode='r') : @@ -101,9 +105,6 @@ # line date date = line.timestamp.date() - # convert to UTC timestamp - utc_timestamp = calendar.timegm(line.timestamp.utctimetuple()) - # ensure that it's not 1900 assert date.year != 1900 @@ -117,18 +118,31 @@ doc.add_attr('type', str(line.type)) # add UTC timestamp - doc.add_attr('timestamp', str(utc_timestamp)) + doc.add_attr('timestamp', str(utils.to_utc_timestamp(line.timestamp))) # add source attribute? if line.source : source_nickname, source_username, source_hostname, source_chanflags = line.source - # XXX: handle source_nickname is None - if not source_nickname is None : - source_nickname = str(source_nickname) + if source_nickname : + doc.add_attr('source_nickname', source_nickname.encode('utf8')) + + if source_username : + doc.add_attr('source_username', source_username.encode('utf8')) - doc.add_attr('source_nickname', source_nickname) + if source_hostname : + doc.add_attr('source_hostname', source_hostname.encode('utf8')) + + if source_chanflags : + doc.add_attr('source_chanflags', source_chanflags.encode('utf8')) + # add target attributes? + if line.target : + target_nickname = line.target + + if target_nickname : + doc.add_attr('target_nickname', target_nickname.encode('utf8')) + # add data if line.data : doc.add_text(line.data.encode('utf8')) @@ -164,19 +178,27 @@ # load the attributes/text channel = self.channels.lookup(doc.attr('channel')) type = int(doc.attr('type')) - timestamp = datetime.datetime.fromtimestamp(int(doc.attr('timestamp')), pytz.utc) - source_nickname = doc.attr('source_nickname') + timestamp = utils.from_utc_timestamp(int(doc.attr('timestamp'))) + + # source + source = (doc.attr('source_nickname'), doc.attr('source_username'), doc.attr('source_hostname'), doc.attr('source_chanflags')) + + # target + target = doc.attr('target_nickname') + + # message text message = doc.cat_texts().decode('utf8') # build+yield to as LogLine - yield log_line.LogLine(channel, None, type, timestamp, (source_nickname, None, None, None), None, message) + yield log_line.LogLine(channel, None, type, timestamp, source, target, message) - def search (self, options=None, channel=None, phrase=None, order=None, max=None, skip=None) : + def search (self, options=None, channel=None, attrs=None, phrase=None, order=None, max=None, skip=None) : """ Search with flexible parameters options - bitmask of hype.Condition.* channel - LogChannel object + attrs - raw attribute expressions phrase - the search query phrase order - order attribute expression max - number of results to return @@ -192,8 +214,13 @@ if channel : # add channel attribute - cond.add_attr("@channel STREQ %s" % (channel.id, )) + cond.add_attr("channel STREQ %s" % (channel.id, )) + if attrs : + # add attributes + for attr in attrs : + cond.add_attr(attr) + if phrase : # add phrase cond.set_phrase(phrase) @@ -229,8 +256,8 @@ # given phrase phrase = query, - # order by timestamp - order = "@timestamp NUMD", + # order by timestamp, descending (backwards) + order = "timestamp NUMD", # count/offset max = count, @@ -240,3 +267,29 @@ # reverse return reversed(results) + def list (self, channel, date, count=None, skip=None) : + """ + List all indexed log items for the given UTC date + """ + + # start/end dates + dt_start = datetime.datetime(date.year, date.month, date.day, 0, 0, 0, 0) + dt_end = datetime.datetime(date.year, date.month, date.day, 23, 23, 59, 999999) + + # search + return self.search( + # specific channel + channel = channel, + + # specific date range + attrs = [ + "timestamp NUMBT %d %d" % (utils.to_utc_timestamp(dt_start), utils.to_utc_timestamp(dt_end)) + ], + + # order correctly + order = "timestamp NUMA", + + # max count/offset + max = count, + skip = skip + )