27 An index on the logs for a group of channels. |
28 An index on the logs for a group of channels. |
28 |
29 |
29 This uses Hyper Estraier to handle searching, whereby each log line is a document (yes, I have a powerful server). |
30 This uses Hyper Estraier to handle searching, whereby each log line is a document (yes, I have a powerful server). |
30 |
31 |
31 These log documents have the following attributes: |
32 These log documents have the following attributes: |
32 @uri - channel/date/line |
33 @uri - channel/date/line |
33 @channel - channel id |
34 channel - channel code |
34 @type - the LogType id |
35 type - the LogType id |
35 @timestamp - UTC timestamp |
36 timestamp - UTC timestamp |
36 @source - nickname |
37 source_nickname - source nickname |
37 |
38 |
38 Each document then has a single line of data, which is the log message itself |
39 Each document then has a single line of data, which is the log message itself |
39 """ |
40 """ |
40 |
41 |
41 def __init__ (self, path, mode='r') : |
42 def __init__ (self, channels, path, mode='r') : |
42 """ |
43 """ |
43 Open the database, with the given mode: |
44 Open the database at the given path, with the given mode: |
44 r - read-only |
45 r - read-only |
45 w - read-write, create if not exists |
46 w - write, create if not exists |
46 a - read-write, do not create |
47 a - write, error if not exists |
47 * - read-write, truncate and create new |
48 c - write, create, error if exists |
48 """ |
49 * - write, create, truncate if exists |
|
50 |
|
51 Channels is the ChannelList. |
|
52 """ |
|
53 |
|
54 # store |
|
55 self.channels = channels |
|
56 self.path = path |
|
57 self.mode = mode |
|
58 |
|
59 # check it does not already exist? |
|
60 if mode in 'c' and os.path.exists(path) : |
|
61 raise LogSearchError("Index already exists: %s" % (path, )) |
49 |
62 |
50 # mapping of { mode -> flags } |
63 # mapping of { mode -> flags } |
51 mode_to_flag = { |
64 mode_to_flag = { |
52 'r': hype.Database.DBREADER, |
65 'r': hype.Database.DBREADER, |
53 'w': hype.Database.DBWRITER | hype.Database.DBCREAT, |
66 'w': hype.Database.DBWRITER | hype.Database.DBCREAT, |
54 'a': hype.Database.DBWRITER, |
67 'a': hype.Database.DBWRITER, |
|
68 'c': hype.Database.DBWRITER | hype.Database.DBCREAT, |
55 '*': hype.Database.DBWRITER | hype.Database.DBCREAT | hype.Database.DBTRUNC, |
69 '*': hype.Database.DBWRITER | hype.Database.DBCREAT | hype.Database.DBTRUNC, |
56 } |
70 } |
57 |
71 |
58 # look up flags |
72 # look up flags |
59 flags = mode_to_flag[mode] |
73 flags = mode_to_flag[mode] |
95 |
109 |
96 # add URI |
110 # add URI |
97 doc.add_attr('@uri', "%s/%s/%d" % (channel.id, date.strftime('%Y-%m-%d'), line.offset)) |
111 doc.add_attr('@uri', "%s/%s/%d" % (channel.id, date.strftime('%Y-%m-%d'), line.offset)) |
98 |
112 |
99 # add channel id |
113 # add channel id |
100 doc.add_attr('@channel', channel.id) |
114 doc.add_attr('channel', channel.id) |
101 |
115 |
102 # add type |
116 # add type |
103 doc.add_attr('@type', str(line.type)) |
117 doc.add_attr('type', str(line.type)) |
104 |
118 |
105 # add UTC timestamp |
119 # add UTC timestamp |
106 doc.add_attr('@timestamp', str(utc_timestamp)) |
120 doc.add_attr('timestamp', str(utc_timestamp)) |
107 |
121 |
108 # add source attribute? |
122 # add source attribute? |
109 if line.source : |
123 if line.source : |
110 doc.add_attr('@source', str(line.source)) |
124 source_nickname, source_username, source_hostname, source_chanflags = line.source |
|
125 |
|
126 # XXX: handle source_nickname is None |
|
127 if not source_nickname is None : |
|
128 source_nickname = str(source_nickname) |
|
129 |
|
130 doc.add_attr('source_nickname', source_nickname) |
111 |
131 |
112 # add data text |
132 # add data |
113 doc.add_text(line.data.encode('utf8')) |
133 if line.data : |
114 |
134 doc.add_text(line.data.encode('utf8')) |
115 # put |
135 |
116 # XXX: what does this flag mean? |
136 # put, "clean up dispensable regions of the overwritten document" |
117 if not self.db.put_doc(doc, hype.Database.PDCLEAN) : |
137 if not self.db.put_doc(doc, hype.Database.PDCLEAN) : |
118 raise Exeception("Index put_doc failed") |
138 raise Exeception("Index put_doc failed") |
119 |
139 |
120 # count |
140 # count |
121 count += 1 |
141 count += 1 |
140 # load document, this throws an exception... |
160 # load document, this throws an exception... |
141 # option constants are hype.Database.GDNOATTR/GDNOTEXT |
161 # option constants are hype.Database.GDNOATTR/GDNOTEXT |
142 doc = self.db.get_doc(doc_id, 0) |
162 doc = self.db.get_doc(doc_id, 0) |
143 |
163 |
144 # load the attributes/text |
164 # load the attributes/text |
145 channel_id = doc.attr('@channel') |
165 channel = self.channels.lookup(doc.attr('channel')) |
146 type = int(doc.attr('@type')) |
166 type = int(doc.attr('type')) |
147 timestamp = datetime.datetime.fromtimestamp(int(doc.attr('@timestamp')), pytz.utc) |
167 timestamp = datetime.datetime.fromtimestamp(int(doc.attr('timestamp')), pytz.utc) |
148 source = doc.attr('@source') |
168 source_nickname = doc.attr('source_nickname') |
149 data = doc.cat_texts().decode('utf8') |
169 message = doc.cat_texts().decode('utf8') |
150 |
170 |
151 # build+yield to as LogLine |
171 # build+yield to as LogLine |
152 # XXX: ignore channel_id for now |
172 yield log_line.LogLine(channel, None, type, timestamp, (source_nickname, None, None, None), None, message) |
153 yield log_line.LogLine(None, type, timestamp, source, data) |
|
154 |
173 |
155 def search (self, options=None, channel=None, phrase=None, order=None, max=None, skip=None) : |
174 def search (self, options=None, channel=None, phrase=None, order=None, max=None, skip=None) : |
156 """ |
175 """ |
157 Search with flexible parameters |
176 Search with flexible parameters |
158 |
177 |