--- a/log_search.py Tue Feb 10 23:00:11 2009 +0200
+++ b/log_search.py Tue Feb 10 23:59:37 2009 +0200
@@ -3,6 +3,7 @@
"""
import datetime, calendar, pytz
+import os.path
import HyperEstraier as hype
@@ -29,29 +30,42 @@
This uses Hyper Estraier to handle searching, whereby each log line is a document (yes, I have a powerful server).
These log documents have the following attributes:
- @uri - channel/date/line
- @channel - channel id
- @type - the LogType id
- @timestamp - UTC timestamp
- @source - nickname
+ @uri - channel/date/line
+ channel - channel code
+ type - the LogType id
+ timestamp - UTC timestamp
+ source_nickname - source nickname
Each document then has a single line of data, which is the log message itself
"""
- def __init__ (self, path, mode='r') :
+ def __init__ (self, channels, path, mode='r') :
"""
- Open the database, with the given mode:
+ Open the database at the given path, with the given mode:
r - read-only
- w - read-write, create if not exists
- a - read-write, do not create
- * - read-write, truncate and create new
+ w - write, create if not exists
+ a - write, error if not exists
+ c - write, create, error if exists
+ * - write, create, truncate if exists
+
+ Channels is the ChannelList.
"""
+
+ # store
+ self.channels = channels
+ self.path = path
+ self.mode = mode
+
+ # check it does not already exist?
+ if mode in 'c' and os.path.exists(path) :
+ raise LogSearchError("Index already exists: %s" % (path, ))
# mapping of { mode -> flags }
mode_to_flag = {
'r': hype.Database.DBREADER,
'w': hype.Database.DBWRITER | hype.Database.DBCREAT,
'a': hype.Database.DBWRITER,
+ 'c': hype.Database.DBWRITER | hype.Database.DBCREAT,
'*': hype.Database.DBWRITER | hype.Database.DBCREAT | hype.Database.DBTRUNC,
}
@@ -97,23 +111,29 @@
doc.add_attr('@uri', "%s/%s/%d" % (channel.id, date.strftime('%Y-%m-%d'), line.offset))
# add channel id
- doc.add_attr('@channel', channel.id)
+ doc.add_attr('channel', channel.id)
# add type
- doc.add_attr('@type', str(line.type))
+ doc.add_attr('type', str(line.type))
# add UTC timestamp
- doc.add_attr('@timestamp', str(utc_timestamp))
+ doc.add_attr('timestamp', str(utc_timestamp))
# add source attribute?
if line.source :
- doc.add_attr('@source', str(line.source))
+ source_nickname, source_username, source_hostname, source_chanflags = line.source
+
+ # XXX: handle source_nickname is None
+ if not source_nickname is None :
+ source_nickname = str(source_nickname)
+
+ doc.add_attr('source_nickname', source_nickname)
- # add data text
- doc.add_text(line.data.encode('utf8'))
+ # add data
+ if line.data :
+ doc.add_text(line.data.encode('utf8'))
- # put
- # XXX: what does this flag mean?
+ # put, "clean up dispensable regions of the overwritten document"
if not self.db.put_doc(doc, hype.Database.PDCLEAN) :
raise Exeception("Index put_doc failed")
@@ -142,15 +162,14 @@
doc = self.db.get_doc(doc_id, 0)
# load the attributes/text
- channel_id = doc.attr('@channel')
- type = int(doc.attr('@type'))
- timestamp = datetime.datetime.fromtimestamp(int(doc.attr('@timestamp')), pytz.utc)
- source = doc.attr('@source')
- data = doc.cat_texts().decode('utf8')
+ channel = self.channels.lookup(doc.attr('channel'))
+ type = int(doc.attr('type'))
+ timestamp = datetime.datetime.fromtimestamp(int(doc.attr('timestamp')), pytz.utc)
+ source_nickname = doc.attr('source_nickname')
+ message = doc.cat_texts().decode('utf8')
# build+yield to as LogLine
- # XXX: ignore channel_id for now
- yield log_line.LogLine(None, type, timestamp, source, data)
+ yield log_line.LogLine(channel, None, type, timestamp, (source_nickname, None, None, None), None, message)
def search (self, options=None, channel=None, phrase=None, order=None, max=None, skip=None) :
"""