add utils.to/from_utc_timestamp functions, fix LogSearchIndex to store all LogLine attributes, add list() method to get LogLines for a given date, and improve scripts/search-index
authorTero Marttila <terom@fixme.fi>
Wed, 11 Feb 2009 00:33:21 +0200
changeset 89 2dc6de43f317
parent 88 0b8e2ba5f76f
child 90 275a675712f1
add utils.to/from_utc_timestamp functions, fix LogSearchIndex to store all LogLine attributes, add list() method to get LogLines for a given date, and improve scripts/search-index
log_search.py
scripts/search-index
utils.py
--- a/log_search.py	Tue Feb 10 23:59:56 2009 +0200
+++ b/log_search.py	Wed Feb 11 00:33:21 2009 +0200
@@ -7,7 +7,7 @@
 
 import HyperEstraier as hype
 
-import log_line
+import log_line, utils
 
 class LogSearchError (Exception) :
     """
@@ -30,13 +30,17 @@
         This uses Hyper Estraier to handle searching, whereby each log line is a document (yes, I have a powerful server).
 
         These log documents have the following attributes:
-            @uri            - channel/date/line
-            channel         - channel code
-            type            - the LogType id
-            timestamp       - UTC timestamp
-            source_nickname - source nickname
+            @uri                - channel/date/line
+            channel             - channel code
+            type                - the LogType id
+            timestamp           - UTC timestamp
+            source_nickname     - source nickname
+            source_username     - source username
+            source_hostname     - source hostname
+            source_chanflags    - source channel flags
+            target_nickname     - target nickname
 
-        Each document then has a single line of data, which is the log message itself
+        Each document then has a single line of data, which is the log data message
     """
 
     def __init__ (self, channels, path, mode='r') :
@@ -101,9 +105,6 @@
             # line date
             date = line.timestamp.date()
 
-            # convert to UTC timestamp
-            utc_timestamp = calendar.timegm(line.timestamp.utctimetuple())
-
             # ensure that it's not 1900
             assert date.year != 1900
 
@@ -117,18 +118,31 @@
             doc.add_attr('type',        str(line.type))
 
             # add UTC timestamp
-            doc.add_attr('timestamp',   str(utc_timestamp))
+            doc.add_attr('timestamp',   str(utils.to_utc_timestamp(line.timestamp)))
 
             # add source attribute?
             if line.source :
                 source_nickname, source_username, source_hostname, source_chanflags = line.source
 
-                # XXX: handle source_nickname is None
-                if not source_nickname is None :
-                    source_nickname = str(source_nickname)
+                if source_nickname :
+                    doc.add_attr('source_nickname', source_nickname.encode('utf8'))
+                
+                if source_username :
+                    doc.add_attr('source_username', source_username.encode('utf8'))
 
-                doc.add_attr('source_nickname', source_nickname)
+                if source_hostname :
+                    doc.add_attr('source_hostname', source_hostname.encode('utf8'))
+
+                if source_chanflags :
+                    doc.add_attr('source_chanflags', source_chanflags.encode('utf8'))
             
+            # add target attributes?
+            if line.target :
+                target_nickname = line.target
+
+                if target_nickname :
+                    doc.add_attr('target_nickname', target_nickname.encode('utf8'))
+
             # add data
             if line.data :
                 doc.add_text(line.data.encode('utf8'))
@@ -164,19 +178,27 @@
             # load the attributes/text
             channel         = self.channels.lookup(doc.attr('channel'))
             type            = int(doc.attr('type'))
-            timestamp       = datetime.datetime.fromtimestamp(int(doc.attr('timestamp')), pytz.utc)
-            source_nickname = doc.attr('source_nickname')
+            timestamp       = utils.from_utc_timestamp(int(doc.attr('timestamp')))
+
+            # source
+            source = (doc.attr('source_nickname'), doc.attr('source_username'), doc.attr('source_hostname'), doc.attr('source_chanflags'))
+
+            # target
+            target = doc.attr('target_nickname')
+            
+            # message text
             message         = doc.cat_texts().decode('utf8')
 
             # build+yield to as LogLine
-            yield log_line.LogLine(channel, None, type, timestamp, (source_nickname, None, None, None), None, message)
+            yield log_line.LogLine(channel, None, type, timestamp, source, target, message)
     
-    def search (self, options=None, channel=None, phrase=None, order=None, max=None, skip=None) :
+    def search (self, options=None, channel=None, attrs=None, phrase=None, order=None, max=None, skip=None) :
         """
             Search with flexible parameters
 
                 options     - bitmask of hype.Condition.*
                 channel     - LogChannel object
+                attrs       - raw attribute expressions
                 phrase      - the search query phrase
                 order       - order attribute expression
                 max         - number of results to return
@@ -192,8 +214,13 @@
         
         if channel :
             # add channel attribute
-            cond.add_attr("@channel STREQ %s" % (channel.id, ))
+            cond.add_attr("channel STREQ %s" % (channel.id, ))
         
+        if attrs :
+            # add attributes
+            for attr in attrs :
+                cond.add_attr(attr)
+
         if phrase :
             # add phrase
             cond.set_phrase(phrase)
@@ -229,8 +256,8 @@
             # given phrase
             phrase      = query,
 
-            # order by timestamp
-            order       = "@timestamp NUMD",
+            # order by timestamp, descending (backwards)
+            order       = "timestamp NUMD",
 
             # count/offset
             max         = count,
@@ -240,3 +267,29 @@
         # reverse
         return reversed(results)
 
+    def list (self, channel, date, count=None, skip=None) :
+        """
+            List all indexed log items for the given UTC date
+        """
+
+        # start/end dates
+        dt_start = datetime.datetime(date.year, date.month, date.day, 0, 0, 0, 0)
+        dt_end   = datetime.datetime(date.year, date.month, date.day, 23, 23, 59, 999999)
+        
+        # search
+        return self.search(
+            # specific channel
+            channel     = channel,
+
+            # specific date range
+            attrs       = [
+                "timestamp NUMBT %d %d" % (utils.to_utc_timestamp(dt_start), utils.to_utc_timestamp(dt_end))
+            ],
+
+            # order correctly
+            order       = "timestamp NUMA",
+
+            # max count/offset
+            max         = count,
+            skip        = skip
+        )
--- a/scripts/search-index	Tue Feb 10 23:59:56 2009 +0200
+++ b/scripts/search-index	Wed Feb 11 00:33:21 2009 +0200
@@ -60,6 +60,32 @@
         if not options.quiet :
             print "OK: %d lines" % count
 
+
+def _parse_date (options, date_str, tz=None, fmt='%Y-%m-%d') :
+    """
+        Parse the given datetime, using the given timezone(defaults to options.tz) and format
+    """
+
+    # default tz
+    if not tz :
+        tz = options.tz
+
+    try :
+        # parse
+        return datetime.datetime.strptime(date_str, fmt).replace(tzinfo=tz)
+
+    except Exception, e :
+        raise CommandError("[ERROR] Invalid date: %s: %s" % (date_str, e))
+
+def _output_lines (options, lines) :
+    """
+        Display the formatted LogLines
+    """
+
+    # display as plaintext
+    for line, txt_data in options.formatter.format_txt(lines, full_timestamps=True) :
+        print txt_data
+
 class CommandError (Exception) :
     """
         Error with command-line arguments
@@ -87,22 +113,22 @@
     index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a')
     
     # handle each date
-    for date_name in dates :
+    for date_str in dates :
+        # prase date
         try :
-            # parse date
-            date = datetime.datetime.strptime(date_name, '%Y-%m-%d').replace(tzinfo=channel.source.tz)
-
-        except Exception, e :
-            print "[ERROR] Invalid date: %s: %s" % (date_name, e)
-
+            date = _parse_date(options, date_str, channel.source.tz)
+        
+        # handle errors
+        except CommandError, e :
             if options.skip_missing :
-                continue
+                print "[ERROR] %s" % (date_name, e)
 
             else :
                 raise
         
-        # load
-        _load_channel_date(index, options, channel, date)
+        # otherwise, load
+        else :        
+            _load_channel_date(index, options, channel, date)
 
 def cmd_load_month (options, channel_name, *months) :
     """
@@ -113,15 +139,16 @@
     index, channel = _open_index_and_channel(options, channel_name, 'c' if options.create else 'a')
     
     # handle each date
-    for month_name in months :
+    for month_str in months :
+        # prase date
         try :
-            # parse date
-            month = datetime.datetime.strptime(month_name, '%Y-%m').replace(tzinfo=channel.source.tz)
-
-        except Exception, e :
-            print "[ERROR] Invalid date: %s: %s" % (month_name, e)
-
+            month = _parse_date(options, month_str, channel.source.tz, '%Y-%m')
+        
+        # handle errors
+        except CommandError, e :
+            # skip?
             if options.skip_missing :
+                print "[ERROR] %s" % (date_name, e)
                 continue
 
             else :
@@ -146,7 +173,7 @@
     """
     
     # sanity-check
-    if options.create_index :
+    if options.create :
         raise Exception("--create doesn't make sense for 'search'")
     
     # open index/channel
@@ -155,9 +182,31 @@
     # search
     lines = index.search_simple(channel, query)
     
-    # display as plaintext
-    for line in options.formatter.format_txt(lines) :
-        print line
+    # display
+    _output_lines(options, lines)
+
+def cmd_list (options, channel_name, *dates) :
+    """
+        List the indexed events for a specific date
+    """
+
+    # sanity-check
+    if options.create :
+        raise Exception("--create doesn't make sense for 'search'")
+    
+    # open index/channel
+    index, channel = _open_index_and_channel(options, channel_name, 'r')
+
+    # ...for each date
+    for date_str in dates :
+        # parse date
+        date = _parse_date(options, date_str)
+
+        # list
+        lines = index.list(channel, date)
+        
+        # display
+        _output_lines(options, lines)
 
 def cmd_help (options, *args) :
     """
--- a/utils.py	Tue Feb 10 23:59:56 2009 +0200
+++ b/utils.py	Wed Feb 11 00:33:21 2009 +0200
@@ -67,13 +67,27 @@
         """
             timestamp_str -> pytz.utc datetime.datetime
         """
-
-        return datetime.datetime.utcfromtimestamp(int(timestamp_str)).replace(tzinfo=pytz.utc)
+        
+        return from_utc_timestamp(int(timestamp_str))
     
     def build (self, dtz) :
         """
             pytz.utc datetime.datetime -> timestamp_str
         """
+        
+        return str(to_utc_timestamp(dtz))
 
-        return str(calendar.timegm(dtz.utctimetuple()))
+def from_utc_timestamp (timestamp) :
+    """
+        Converts a UNIX timestamp into a datetime.datetime
+    """
 
+    return datetime.datetime.utcfromtimestamp(timestamp).replace(tzinfo=pytz.utc)
+
+def to_utc_timestamp (dt) :
+    """
+        Converts a datetime.datetime into a UNIX timestamp
+    """
+
+    return calendar.timegm(dt.utctimetuple())
+