add utils.to/from_utc_timestamp functions, fix LogSearchIndex to store all LogLine attributes, add list() method to get LogLines for a given date, and improve scripts/search-index
--- a/log_search.py Tue Feb 10 23:59:56 2009 +0200
+++ b/log_search.py Wed Feb 11 00:33:21 2009 +0200
@@ -7,7 +7,7 @@
import HyperEstraier as hype
-import log_line
+import log_line, utils
class LogSearchError (Exception) :
"""
@@ -30,13 +30,17 @@
This uses Hyper Estraier to handle searching, whereby each log line is a document (yes, I have a powerful server).
These log documents have the following attributes:
- @uri - channel/date/line
- channel - channel code
- type - the LogType id
- timestamp - UTC timestamp
- source_nickname - source nickname
+ @uri - channel/date/line
+ channel - channel code
+ type - the LogType id
+ timestamp - UTC timestamp
+ source_nickname - source nickname
+ source_username - source username
+ source_hostname - source hostname
+ source_chanflags - source channel flags
+ target_nickname - target nickname
- Each document then has a single line of data, which is the log message itself
+ Each document then has a single line of data, which is the log data message
"""
def __init__ (self, channels, path, mode='r') :
@@ -101,9 +105,6 @@
# line date
date = line.timestamp.date()
- # convert to UTC timestamp
- utc_timestamp = calendar.timegm(line.timestamp.utctimetuple())
-
# ensure that it's not 1900
assert date.year != 1900
@@ -117,18 +118,31 @@
doc.add_attr('type', str(line.type))
# add UTC timestamp
- doc.add_attr('timestamp', str(utc_timestamp))
+ doc.add_attr('timestamp', str(utils.to_utc_timestamp(line.timestamp)))
# add source attribute?
if line.source :
source_nickname, source_username, source_hostname, source_chanflags = line.source
- # XXX: handle source_nickname is None
- if not source_nickname is None :
- source_nickname = str(source_nickname)
+ if source_nickname :
+ doc.add_attr('source_nickname', source_nickname.encode('utf8'))
+
+ if source_username :
+ doc.add_attr('source_username', source_username.encode('utf8'))
- doc.add_attr('source_nickname', source_nickname)
+ if source_hostname :
+ doc.add_attr('source_hostname', source_hostname.encode('utf8'))
+
+ if source_chanflags :
+ doc.add_attr('source_chanflags', source_chanflags.encode('utf8'))
+ # add target attributes?
+ if line.target :
+ target_nickname = line.target
+
+ if target_nickname :
+ doc.add_attr('target_nickname', target_nickname.encode('utf8'))
+
# add data
if line.data :
doc.add_text(line.data.encode('utf8'))
@@ -164,19 +178,27 @@
# load the attributes/text
channel = self.channels.lookup(doc.attr('channel'))
type = int(doc.attr('type'))
- timestamp = datetime.datetime.fromtimestamp(int(doc.attr('timestamp')), pytz.utc)
- source_nickname = doc.attr('source_nickname')
+ timestamp = utils.from_utc_timestamp(int(doc.attr('timestamp')))
+
+ # source
+ source = (doc.attr('source_nickname'), doc.attr('source_username'), doc.attr('source_hostname'), doc.attr('source_chanflags'))
+
+ # target
+ target = doc.attr('target_nickname')
+
+ # message text
message = doc.cat_texts().decode('utf8')
# build+yield to as LogLine
- yield log_line.LogLine(channel, None, type, timestamp, (source_nickname, None, None, None), None, message)
+ yield log_line.LogLine(channel, None, type, timestamp, source, target, message)
- def search (self, options=None, channel=None, phrase=None, order=None, max=None, skip=None) :
+ def search (self, options=None, channel=None, attrs=None, phrase=None, order=None, max=None, skip=None) :
"""
Search with flexible parameters
options - bitmask of hype.Condition.*
channel - LogChannel object
+ attrs - raw attribute expressions
phrase - the search query phrase
order - order attribute expression
max - number of results to return
@@ -192,8 +214,13 @@
if channel :
# add channel attribute
- cond.add_attr("@channel STREQ %s" % (channel.id, ))
+ cond.add_attr("channel STREQ %s" % (channel.id, ))
+ if attrs :
+ # add attributes
+ for attr in attrs :
+ cond.add_attr(attr)
+
if phrase :
# add phrase
cond.set_phrase(phrase)
@@ -229,8 +256,8 @@
# given phrase
phrase = query,
- # order by timestamp
- order = "@timestamp NUMD",
+ # order by timestamp, descending (backwards)
+ order = "timestamp NUMD",
# count/offset
max = count,
@@ -240,3 +267,29 @@
# reverse
return reversed(results)
+ def list (self, channel, date, count=None, skip=None) :
+ """
+ List all indexed log items for the given UTC date
+ """
+
+ # start/end dates
+ dt_start = datetime.datetime(date.year, date.month, date.day, 0, 0, 0, 0)
+ dt_end = datetime.datetime(date.year, date.month, date.day, 23, 23, 59, 999999)
+
+ # search
+ return self.search(
+ # specific channel
+ channel = channel,
+
+ # specific date range
+ attrs = [
+ "timestamp NUMBT %d %d" % (utils.to_utc_timestamp(dt_start), utils.to_utc_timestamp(dt_end))
+ ],
+
+ # order correctly
+ order = "timestamp NUMA",
+
+ # max count/offset
+ max = count,
+ skip = skip
+ )
--- a/scripts/search-index Tue Feb 10 23:59:56 2009 +0200
+++ b/scripts/search-index Wed Feb 11 00:33:21 2009 +0200
@@ -60,6 +60,32 @@
if not options.quiet :
print "OK: %d lines" % count
+
+def _parse_date (options, date_str, tz=None, fmt='%Y-%m-%d') :
+ """
+ Parse the given datetime, using the given timezone(defaults to options.tz) and format
+ """
+
+ # default tz
+ if not tz :
+ tz = options.tz
+
+ try :
+ # parse
+ return datetime.datetime.strptime(date_str, fmt).replace(tzinfo=tz)
+
+ except Exception, e :
+ raise CommandError("[ERROR] Invalid date: %s: %s" % (date_str, e))
+
+def _output_lines (options, lines) :
+ """
+ Display the formatted LogLines
+ """
+
+ # display as plaintext
+ for line, txt_data in options.formatter.format_txt(lines, full_timestamps=True) :
+ print txt_data
+
class CommandError (Exception) :
"""
Error with command-line arguments
@@ -87,22 +113,22 @@
index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a')
# handle each date
- for date_name in dates :
+ for date_str in dates :
+ # prase date
try :
- # parse date
- date = datetime.datetime.strptime(date_name, '%Y-%m-%d').replace(tzinfo=channel.source.tz)
-
- except Exception, e :
- print "[ERROR] Invalid date: %s: %s" % (date_name, e)
-
+ date = _parse_date(options, date_str, channel.source.tz)
+
+ # handle errors
+ except CommandError, e :
if options.skip_missing :
- continue
+ print "[ERROR] %s" % (date_name, e)
else :
raise
- # load
- _load_channel_date(index, options, channel, date)
+ # otherwise, load
+ else :
+ _load_channel_date(index, options, channel, date)
def cmd_load_month (options, channel_name, *months) :
"""
@@ -113,15 +139,16 @@
index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a')
# handle each date
- for month_name in months :
+ for month_str in months :
+ # prase date
try :
- # parse date
- month = datetime.datetime.strptime(month_name, '%Y-%m').replace(tzinfo=channel.source.tz)
-
- except Exception, e :
- print "[ERROR] Invalid date: %s: %s" % (month_name, e)
-
+ month = _parse_date(options, month_str, channel.source.tz, '%Y-%m')
+
+ # handle errors
+ except CommandError, e :
+ # skip?
if options.skip_missing :
+ print "[ERROR] %s" % (date_name, e)
continue
else :
@@ -146,7 +173,7 @@
"""
# sanity-check
- if options.create_index :
+ if options.create :
raise Exception("--create doesn't make sense for 'search'")
# open index/channel
@@ -155,9 +182,31 @@
# search
lines = index.search_simple(channel, query)
- # display as plaintext
- for line in options.formatter.format_txt(lines) :
- print line
+ # display
+ _output_lines(options, lines)
+
+def cmd_list (options, channel_name, *dates) :
+ """
+ List the indexed events for a specific date
+ """
+
+ # sanity-check
+ if options.create :
+ raise Exception("--create doesn't make sense for 'search'")
+
+ # open index/channel
+ index, channel = _open_index_and_channel(options, channel_name, 'r')
+
+ # ...for each date
+ for date_str in dates :
+ # parse date
+ date = _parse_date(options, date_str)
+
+ # list
+ lines = index.list(channel, date)
+
+ # display
+ _output_lines(options, lines)
def cmd_help (options, *args) :
"""
--- a/utils.py Tue Feb 10 23:59:56 2009 +0200
+++ b/utils.py Wed Feb 11 00:33:21 2009 +0200
@@ -67,13 +67,27 @@
"""
timestamp_str -> pytz.utc datetime.datetime
"""
-
- return datetime.datetime.utcfromtimestamp(int(timestamp_str)).replace(tzinfo=pytz.utc)
+
+ return from_utc_timestamp(int(timestamp_str))
def build (self, dtz) :
"""
pytz.utc datetime.datetime -> timestamp_str
"""
+
+ return str(to_utc_timestamp(dtz))
- return str(calendar.timegm(dtz.utctimetuple()))
+def from_utc_timestamp (timestamp) :
+ """
+ Converts a UNIX timestamp into a datetime.datetime
+ """
+ return datetime.datetime.utcfromtimestamp(timestamp).replace(tzinfo=pytz.utc)
+
+def to_utc_timestamp (dt) :
+ """
+ Converts a datetime.datetime into a UNIX timestamp
+ """
+
+ return calendar.timegm(dt.utctimetuple())
+