log_search.py
changeset 89 2dc6de43f317
parent 87 39915772f090
child 93 48fca00689e3
--- a/log_search.py	Tue Feb 10 23:59:56 2009 +0200
+++ b/log_search.py	Wed Feb 11 00:33:21 2009 +0200
@@ -7,7 +7,7 @@
 
 import HyperEstraier as hype
 
-import log_line
+import log_line, utils
 
 class LogSearchError (Exception) :
     """
@@ -30,13 +30,17 @@
         This uses Hyper Estraier to handle searching, whereby each log line is a document (yes, I have a powerful server).
 
         These log documents have the following attributes:
-            @uri            - channel/date/line
-            channel         - channel code
-            type            - the LogType id
-            timestamp       - UTC timestamp
-            source_nickname - source nickname
+            @uri                - channel/date/line
+            channel             - channel code
+            type                - the LogType id
+            timestamp           - UTC timestamp
+            source_nickname     - source nickname
+            source_username     - source username
+            source_hostname     - source hostname
+            source_chanflags    - source channel flags
+            target_nickname     - target nickname
 
-        Each document then has a single line of data, which is the log message itself
+        Each document then has a single line of data, which is the log data message
     """
 
     def __init__ (self, channels, path, mode='r') :
@@ -101,9 +105,6 @@
             # line date
             date = line.timestamp.date()
 
-            # convert to UTC timestamp
-            utc_timestamp = calendar.timegm(line.timestamp.utctimetuple())
-
             # ensure that it's not 1900
             assert date.year != 1900
 
@@ -117,18 +118,31 @@
             doc.add_attr('type',        str(line.type))
 
             # add UTC timestamp
-            doc.add_attr('timestamp',   str(utc_timestamp))
+            doc.add_attr('timestamp',   str(utils.to_utc_timestamp(line.timestamp)))
 
             # add source attribute?
             if line.source :
                 source_nickname, source_username, source_hostname, source_chanflags = line.source
 
-                # XXX: handle source_nickname is None
-                if not source_nickname is None :
-                    source_nickname = str(source_nickname)
+                if source_nickname :
+                    doc.add_attr('source_nickname', source_nickname.encode('utf8'))
+                
+                if source_username :
+                    doc.add_attr('source_username', source_username.encode('utf8'))
 
-                doc.add_attr('source_nickname', source_nickname)
+                if source_hostname :
+                    doc.add_attr('source_hostname', source_hostname.encode('utf8'))
+
+                if source_chanflags :
+                    doc.add_attr('source_chanflags', source_chanflags.encode('utf8'))
             
+            # add target attributes?
+            if line.target :
+                target_nickname = line.target
+
+                if target_nickname :
+                    doc.add_attr('target_nickname', target_nickname.encode('utf8'))
+
             # add data
             if line.data :
                 doc.add_text(line.data.encode('utf8'))
@@ -164,19 +178,27 @@
             # load the attributes/text
             channel         = self.channels.lookup(doc.attr('channel'))
             type            = int(doc.attr('type'))
-            timestamp       = datetime.datetime.fromtimestamp(int(doc.attr('timestamp')), pytz.utc)
-            source_nickname = doc.attr('source_nickname')
+            timestamp       = utils.from_utc_timestamp(int(doc.attr('timestamp')))
+
+            # source
+            source = (doc.attr('source_nickname'), doc.attr('source_username'), doc.attr('source_hostname'), doc.attr('source_chanflags'))
+
+            # target
+            target = doc.attr('target_nickname')
+            
+            # message text
             message         = doc.cat_texts().decode('utf8')
 
             # build+yield to as LogLine
-            yield log_line.LogLine(channel, None, type, timestamp, (source_nickname, None, None, None), None, message)
+            yield log_line.LogLine(channel, None, type, timestamp, source, target, message)
     
-    def search (self, options=None, channel=None, phrase=None, order=None, max=None, skip=None) :
+    def search (self, options=None, channel=None, attrs=None, phrase=None, order=None, max=None, skip=None) :
         """
             Search with flexible parameters
 
                 options     - bitmask of hype.Condition.*
                 channel     - LogChannel object
+                attrs       - raw attribute expressions
                 phrase      - the search query phrase
                 order       - order attribute expression
                 max         - number of results to return
@@ -192,8 +214,13 @@
         
         if channel :
             # add channel attribute
-            cond.add_attr("@channel STREQ %s" % (channel.id, ))
+            cond.add_attr("channel STREQ %s" % (channel.id, ))
         
+        if attrs :
+            # add attributes
+            for attr in attrs :
+                cond.add_attr(attr)
+
         if phrase :
             # add phrase
             cond.set_phrase(phrase)
@@ -229,8 +256,8 @@
             # given phrase
             phrase      = query,
 
-            # order by timestamp
-            order       = "@timestamp NUMD",
+            # order by timestamp, descending (backwards)
+            order       = "timestamp NUMD",
 
             # count/offset
             max         = count,
@@ -240,3 +267,29 @@
         # reverse
         return reversed(results)
 
+    def list (self, channel, date, count=None, skip=None) :
+        """
+            List all indexed log items for the given UTC date
+        """
+
+        # start/end dates
+        dt_start = datetime.datetime(date.year, date.month, date.day, 0, 0, 0, 0)
+        dt_end   = datetime.datetime(date.year, date.month, date.day, 23, 23, 59, 999999)
+        
+        # search
+        return self.search(
+            # specific channel
+            channel     = channel,
+
+            # specific date range
+            attrs       = [
+                "timestamp NUMBT %d %d" % (utils.to_utc_timestamp(dt_start), utils.to_utc_timestamp(dt_end))
+            ],
+
+            # order correctly
+            order       = "timestamp NUMA",
+
+            # max count/offset
+            max         = count,
+            skip        = skip
+        )