log_search.py
changeset 87 39915772f090
parent 74 1ab95857d584
child 89 2dc6de43f317
--- a/log_search.py	Tue Feb 10 23:00:11 2009 +0200
+++ b/log_search.py	Tue Feb 10 23:59:37 2009 +0200
@@ -3,6 +3,7 @@
 """
 
 import datetime, calendar, pytz
+import os.path
 
 import HyperEstraier as hype
 
@@ -29,29 +30,42 @@
         This uses Hyper Estraier to handle searching, whereby each log line is a document (yes, I have a powerful server).
 
         These log documents have the following attributes:
-            @uri        - channel/date/line
-            @channel    - channel id
-            @type       - the LogType id
-            @timestamp  - UTC timestamp
-            @source     - nickname
+            @uri            - channel/date/line
+            channel         - channel code
+            type            - the LogType id
+            timestamp       - UTC timestamp
+            source_nickname - source nickname
 
         Each document then has a single line of data, which is the log message itself
     """
 
-    def __init__ (self, path, mode='r') :
+    def __init__ (self, channels, path, mode='r') :
         """
-            Open the database, with the given mode:
+            Open the database at the given path, with the given mode:
                 r       - read-only
-                w       - read-write, create if not exists
-                a       - read-write, do not create
-                *       - read-write, truncate and create new
+                w       - write, create if not exists
+                a       - write, error if not exists
+                c       - write, create, error if exists
+                *       - write, create, truncate if exists
+            
+            Channels is the ChannelList.
         """
+
+        # store
+        self.channels = channels
+        self.path = path
+        self.mode = mode
+
+        # check it does not already exist?
+        if mode in 'c' and os.path.exists(path) :
+            raise LogSearchError("Index already exists: %s" % (path, ))
         
         # mapping of { mode -> flags }
         mode_to_flag = {
             'r':    hype.Database.DBREADER,
             'w':    hype.Database.DBWRITER | hype.Database.DBCREAT,
             'a':    hype.Database.DBWRITER,
+            'c':    hype.Database.DBWRITER | hype.Database.DBCREAT,
             '*':    hype.Database.DBWRITER | hype.Database.DBCREAT | hype.Database.DBTRUNC,
         }
 
@@ -97,23 +111,29 @@
             doc.add_attr('@uri',        "%s/%s/%d" % (channel.id, date.strftime('%Y-%m-%d'), line.offset))
 
             # add channel id
-            doc.add_attr('@channel',    channel.id)
+            doc.add_attr('channel',     channel.id)
 
             # add type
-            doc.add_attr('@type',       str(line.type))
+            doc.add_attr('type',        str(line.type))
 
             # add UTC timestamp
-            doc.add_attr('@timestamp',  str(utc_timestamp))
+            doc.add_attr('timestamp',   str(utc_timestamp))
 
             # add source attribute?
             if line.source :
-                doc.add_attr('@source', str(line.source))
+                source_nickname, source_username, source_hostname, source_chanflags = line.source
+
+                # XXX: handle source_nickname is None
+                if not source_nickname is None :
+                    source_nickname = str(source_nickname)
+
+                doc.add_attr('source_nickname', source_nickname)
             
-            # add data text
-            doc.add_text(line.data.encode('utf8'))
+            # add data
+            if line.data :
+                doc.add_text(line.data.encode('utf8'))
 
-            # put
-            # XXX: what does this flag mean?
+            # put, "clean up dispensable regions of the overwritten document"
             if not self.db.put_doc(doc, hype.Database.PDCLEAN) :
                 raise Exeception("Index put_doc failed")
             
@@ -142,15 +162,14 @@
             doc = self.db.get_doc(doc_id, 0)
 
             # load the attributes/text
-            channel_id  = doc.attr('@channel')
-            type        = int(doc.attr('@type'))
-            timestamp   = datetime.datetime.fromtimestamp(int(doc.attr('@timestamp')), pytz.utc)
-            source      = doc.attr('@source')
-            data        = doc.cat_texts().decode('utf8')
+            channel         = self.channels.lookup(doc.attr('channel'))
+            type            = int(doc.attr('type'))
+            timestamp       = datetime.datetime.fromtimestamp(int(doc.attr('timestamp')), pytz.utc)
+            source_nickname = doc.attr('source_nickname')
+            message         = doc.cat_texts().decode('utf8')
 
             # build+yield to as LogLine
-            # XXX: ignore channel_id for now
-            yield log_line.LogLine(None, type, timestamp, source, data)
+            yield log_line.LogLine(channel, None, type, timestamp, (source_nickname, None, None, None), None, message)
     
     def search (self, options=None, channel=None, phrase=None, order=None, max=None, skip=None) :
         """