28 An index on the logs for a group of channels. |
28 An index on the logs for a group of channels. |
29 |
29 |
30 This uses Hyper Estraier to handle searching, whereby each log line is a document (yes, I have a powerful server). |
30 This uses Hyper Estraier to handle searching, whereby each log line is a document (yes, I have a powerful server). |
31 |
31 |
32 These log documents have the following attributes: |
32 These log documents have the following attributes: |
33 @uri - channel/date/line |
33 @uri - channel/date/line |
34 channel - channel code |
34 channel - channel code |
35 type - the LogType id |
35 type - the LogType id |
36 timestamp - UTC timestamp |
36 timestamp - UTC timestamp |
37 source_nickname - source nickname |
37 source_nickname - source nickname |
38 |
38 source_username - source username |
39 Each document then has a single line of data, which is the log message itself |
39 source_hostname - source hostname |
|
40 source_chanflags - source channel flags |
|
41 target_nickname - target nickname |
|
42 |
|
43 Each document then has a single line of data, which is the log data message |
40 """ |
44 """ |
41 |
45 |
42 def __init__ (self, channels, path, mode='r') : |
46 def __init__ (self, channels, path, mode='r') : |
43 """ |
47 """ |
44 Open the database at the given path, with the given mode: |
48 Open the database at the given path, with the given mode: |
99 doc = hype.Document() |
103 doc = hype.Document() |
100 |
104 |
101 # line date |
105 # line date |
102 date = line.timestamp.date() |
106 date = line.timestamp.date() |
103 |
107 |
104 # convert to UTC timestamp |
|
105 utc_timestamp = calendar.timegm(line.timestamp.utctimetuple()) |
|
106 |
|
107 # ensure that it's not 1900 |
108 # ensure that it's not 1900 |
108 assert date.year != 1900 |
109 assert date.year != 1900 |
109 |
110 |
110 # add URI |
111 # add URI |
111 doc.add_attr('@uri', "%s/%s/%d" % (channel.id, date.strftime('%Y-%m-%d'), line.offset)) |
112 doc.add_attr('@uri', "%s/%s/%d" % (channel.id, date.strftime('%Y-%m-%d'), line.offset)) |
115 |
116 |
116 # add type |
117 # add type |
117 doc.add_attr('type', str(line.type)) |
118 doc.add_attr('type', str(line.type)) |
118 |
119 |
119 # add UTC timestamp |
120 # add UTC timestamp |
120 doc.add_attr('timestamp', str(utc_timestamp)) |
121 doc.add_attr('timestamp', str(utils.to_utc_timestamp(line.timestamp))) |
121 |
122 |
122 # add source attribute? |
123 # add source attribute? |
123 if line.source : |
124 if line.source : |
124 source_nickname, source_username, source_hostname, source_chanflags = line.source |
125 source_nickname, source_username, source_hostname, source_chanflags = line.source |
125 |
126 |
126 # XXX: handle source_nickname is None |
127 if source_nickname : |
127 if not source_nickname is None : |
128 doc.add_attr('source_nickname', source_nickname.encode('utf8')) |
128 source_nickname = str(source_nickname) |
129 |
129 |
130 if source_username : |
130 doc.add_attr('source_nickname', source_nickname) |
131 doc.add_attr('source_username', source_username.encode('utf8')) |
131 |
132 |
|
133 if source_hostname : |
|
134 doc.add_attr('source_hostname', source_hostname.encode('utf8')) |
|
135 |
|
136 if source_chanflags : |
|
137 doc.add_attr('source_chanflags', source_chanflags.encode('utf8')) |
|
138 |
|
139 # add target attributes? |
|
140 if line.target : |
|
141 target_nickname = line.target |
|
142 |
|
143 if target_nickname : |
|
144 doc.add_attr('target_nickname', target_nickname.encode('utf8')) |
|
145 |
132 # add data |
146 # add data |
133 if line.data : |
147 if line.data : |
134 doc.add_text(line.data.encode('utf8')) |
148 doc.add_text(line.data.encode('utf8')) |
135 |
149 |
136 # put, "clean up dispensable regions of the overwritten document" |
150 # put, "clean up dispensable regions of the overwritten document" |
162 doc = self.db.get_doc(doc_id, 0) |
176 doc = self.db.get_doc(doc_id, 0) |
163 |
177 |
164 # load the attributes/text |
178 # load the attributes/text |
165 channel = self.channels.lookup(doc.attr('channel')) |
179 channel = self.channels.lookup(doc.attr('channel')) |
166 type = int(doc.attr('type')) |
180 type = int(doc.attr('type')) |
167 timestamp = datetime.datetime.fromtimestamp(int(doc.attr('timestamp')), pytz.utc) |
181 timestamp = utils.from_utc_timestamp(int(doc.attr('timestamp'))) |
168 source_nickname = doc.attr('source_nickname') |
182 |
|
183 # source |
|
184 source = (doc.attr('source_nickname'), doc.attr('source_username'), doc.attr('source_hostname'), doc.attr('source_chanflags')) |
|
185 |
|
186 # target |
|
187 target = doc.attr('target_nickname') |
|
188 |
|
189 # message text |
169 message = doc.cat_texts().decode('utf8') |
190 message = doc.cat_texts().decode('utf8') |
170 |
191 |
171 # build+yield to as LogLine |
192 # build+yield to as LogLine |
172 yield log_line.LogLine(channel, None, type, timestamp, (source_nickname, None, None, None), None, message) |
193 yield log_line.LogLine(channel, None, type, timestamp, source, target, message) |
173 |
194 |
174 def search (self, options=None, channel=None, phrase=None, order=None, max=None, skip=None) : |
195 def search (self, options=None, channel=None, attrs=None, phrase=None, order=None, max=None, skip=None) : |
175 """ |
196 """ |
176 Search with flexible parameters |
197 Search with flexible parameters |
177 |
198 |
178 options - bitmask of hype.Condition.* |
199 options - bitmask of hype.Condition.* |
179 channel - LogChannel object |
200 channel - LogChannel object |
|
201 attrs - raw attribute expressions |
180 phrase - the search query phrase |
202 phrase - the search query phrase |
181 order - order attribute expression |
203 order - order attribute expression |
182 max - number of results to return |
204 max - number of results to return |
183 skip - number of results to skip |
205 skip - number of results to skip |
184 """ |
206 """ |
227 channel = channel, |
254 channel = channel, |
228 |
255 |
229 # given phrase |
256 # given phrase |
230 phrase = query, |
257 phrase = query, |
231 |
258 |
232 # order by timestamp |
259 # order by timestamp, descending (backwards) |
233 order = "@timestamp NUMD", |
260 order = "timestamp NUMD", |
234 |
261 |
235 # count/offset |
262 # count/offset |
236 max = count, |
263 max = count, |
237 skip = offset, |
264 skip = offset, |
238 )) |
265 )) |
239 |
266 |
240 # reverse |
267 # reverse |
241 return reversed(results) |
268 return reversed(results) |
242 |
269 |
|
270 def list (self, channel, date, count=None, skip=None) : |
|
271 """ |
|
272 List all indexed log items for the given UTC date |
|
273 """ |
|
274 |
|
275 # start/end dates |
|
276 dt_start = datetime.datetime(date.year, date.month, date.day, 0, 0, 0, 0) |
|
277 dt_end = datetime.datetime(date.year, date.month, date.day, 23, 23, 59, 999999) |
|
278 |
|
279 # search |
|
280 return self.search( |
|
281 # specific channel |
|
282 channel = channel, |
|
283 |
|
284 # specific date range |
|
285 attrs = [ |
|
286 "timestamp NUMBT %d %d" % (utils.to_utc_timestamp(dt_start), utils.to_utc_timestamp(dt_end)) |
|
287 ], |
|
288 |
|
289 # order correctly |
|
290 order = "timestamp NUMA", |
|
291 |
|
292 # max count/offset |
|
293 max = count, |
|
294 skip = skip |
|
295 ) |