author | Tero Marttila <terom@fixme.fi> |
Sun, 08 Feb 2009 04:41:00 +0200 | |
changeset 48 | 7858b7b8ffe3 |
parent 46 | 185504387370 |
child 50 | f13cf27a360b |
permissions | -rw-r--r-- |
41 | 1 |
""" |
2 |
A source of IRC log files |
|
3 |
""" |
|
4 |
||
5 |
import codecs |
|
6 |
from datetime import date, datetime, timedelta |
|
7 |
import pytz |
|
8 |
||
9 |
# for SEEK_*, errno |
|
10 |
import os, errno |
|
11 |
||
12 |
class LogSource (object) : |
|
13 |
""" |
|
14 |
A collection of IRC logs for a specific target in some format. Provides the possibility to read specific events |
|
15 |
""" |
|
16 |
||
17 |
def get_latest (self, count) : |
|
18 |
""" |
|
19 |
Yield the latest events, up to `count` of them. |
|
20 |
""" |
|
21 |
||
22 |
abstract |
|
23 |
||
24 |
class LogFile (LogSource) : |
|
25 |
""" |
|
26 |
A file containing LogEvents |
|
27 |
""" |
|
28 |
||
29 |
def __init__ (self, path, charset='utf-8', sep='\n') : |
|
30 |
""" |
|
31 |
Open the file at the given path, which contains data of the given codec, as lines separated by the given separator |
|
32 |
""" |
|
33 |
||
34 |
# store |
|
35 |
self.path = path |
|
36 |
self.charset = charset |
|
37 |
self.sep = sep |
|
38 |
||
39 |
# open |
|
48 | 40 |
self.file = open(path, 'rb') |
41 | 41 |
|
42 |
def __iter__ (self) : |
|
43 |
""" |
|
44 |
Yields a series of lines, as read from the top of the file |
|
45 |
""" |
|
46 |
||
47 |
# seek to beginning |
|
48 |
self.file.seek(0) |
|
49 |
||
50 |
# iterate over lines |
|
51 |
return iter(self.file) |
|
52 |
||
53 |
def get_latest (self, count) : |
|
54 |
""" |
|
55 |
Returns up to <count> lines from the end of the file, or less, if the file doesn't contain that many lines |
|
56 |
""" |
|
57 |
||
58 |
# the list of lines |
|
59 |
lines = [] |
|
60 |
||
61 |
# seek to end of file |
|
62 |
self.file.seek(0, os.SEEK_END) |
|
63 |
||
64 |
# read offset |
|
48 | 65 |
# XXX: hack -1 to get rid of trailing newline |
66 |
size = offset = self.file.tell() - 1 |
|
41 | 67 |
|
68 |
# use this blocksize |
|
69 |
BLOCKSIZE = 1024 |
|
70 |
||
71 |
# trailing data |
|
72 |
buf = '' |
|
73 |
||
74 |
# read a block at a time, backwards |
|
48 | 75 |
while len(lines) < count and offset > 0: |
76 |
# calc new offset + size |
|
77 |
if offset > BLOCKSIZE : |
|
78 |
# full block |
|
79 |
offset -= BLOCKSIZE |
|
80 |
read_size = BLOCKSIZE |
|
41 | 81 |
|
48 | 82 |
else : |
83 |
# partial block |
|
84 |
read_size = offset |
|
41 | 85 |
offset = 0 |
86 |
||
43
fc11c4e86a82
implement channel_view count, the query stuff, css, layout all need some cleanup :(
Tero Marttila <terom@fixme.fi>
parents:
41
diff
changeset
|
87 |
# seek to offset |
41 | 88 |
self.file.seek(offset) |
89 |
||
48 | 90 |
# read the data we want |
91 |
read_buf = self.file.read(read_size) |
|
92 |
read_len = len(read_buf) |
|
41 | 93 |
|
48 | 94 |
# sanity check |
95 |
assert read_len == read_size |
|
41 | 96 |
|
97 |
# add in our previous buf |
|
98 |
buf = read_buf + buf |
|
99 |
||
100 |
# split out lines |
|
101 |
buf_lines = buf.split(self.sep) |
|
102 |
||
103 |
# keep the first one as our buffer, as it's incomplete |
|
104 |
buf = buf_lines[0] |
|
105 |
||
48 | 106 |
# prepend up to count lines from the end to our lines buffer |
43
fc11c4e86a82
implement channel_view count, the query stuff, css, layout all need some cleanup :(
Tero Marttila <terom@fixme.fi>
parents:
41
diff
changeset
|
107 |
lines = buf_lines[-min(count, len(buf_lines) - 1):] + lines |
48 | 108 |
|
109 |
# decode |
|
110 |
# XXX: better queue implementation, plz |
|
111 |
lines = [line.decode(self.charset) for line in lines] |
|
41 | 112 |
|
113 |
# return the line list |
|
114 |
return lines |
|
115 |
||
116 |
class LogDirectory (LogSource) : |
|
117 |
""" |
|
118 |
A directory containing a series of timestamped LogFiles |
|
119 |
""" |
|
120 |
||
121 |
def __init__ (self, path, tz, charset='utf-8', filename_fmt='%Y-%m-%d') : |
|
122 |
""" |
|
123 |
Load the logfiles at the given path. |
|
124 |
||
125 |
The files contain data in the given charset, and are named according the the date in the given timezone and |
|
126 |
date format. |
|
127 |
""" |
|
128 |
||
129 |
# store |
|
130 |
self.path = path |
|
131 |
self.tz = tz |
|
132 |
self.charset = charset |
|
133 |
self.filename_fmt = filename_fmt |
|
134 |
||
135 |
def _get_logfile_datetime (self, dt) : |
|
136 |
""" |
|
137 |
Get the logfile corresponding to the given datetime |
|
138 |
""" |
|
139 |
||
140 |
# convert to target timezone |
|
141 |
dtz = dt.astimezone(self.tz) |
|
142 |
||
143 |
# convert to date and use that |
|
144 |
return self._get_logfile_date(dtz.date()) |
|
145 |
||
146 |
def _get_logfile_date (self, d) : |
|
147 |
""" |
|
148 |
Get the logfile corresponding to the given naive date in our timezone |
|
149 |
""" |
|
150 |
||
151 |
# format filename |
|
152 |
filename = d.strftime(self.filename_fmt) |
|
153 |
||
154 |
# build path |
|
155 |
path = os.path.join(self.path, filename) |
|
156 |
||
157 |
# return the LogFile |
|
158 |
return LogFile(path, self.charset) |
|
159 |
||
160 |
def _iter_backwards (self, dt=None) : |
|
161 |
""" |
|
162 |
Yields an infinite series of naive date objects in our timezone, iterating backwards in time starting at the |
|
163 |
given *datetime*, or the the current date, if none given |
|
164 |
""" |
|
165 |
||
166 |
# default to now |
|
167 |
if not dt : |
|
168 |
dt = datetime.now(pytz.utc) |
|
169 |
||
170 |
# convert to target timezone |
|
171 |
dtz = dt.astimezone(self.tz) |
|
172 |
||
173 |
# our timedelta |
|
174 |
ONE_DAY = timedelta(1) |
|
175 |
||
176 |
# iterate unto infinity |
|
177 |
while True : |
|
178 |
# yield |
|
179 |
yield dtz.date() |
|
180 |
||
181 |
# one day sdrawkcab |
|
182 |
dtz -= ONE_DAY |
|
183 |
||
184 |
def get_latest (self, count) : |
|
185 |
""" |
|
186 |
Uses _iter_backwards + _get_logfile_date to read the yield the given lines from as many logfiles as needed |
|
187 |
""" |
|
188 |
||
189 |
# iterate backwards from now |
|
190 |
day_iter = self._iter_backwards() |
|
191 |
||
192 |
# number of files read |
|
193 |
files = 0 |
|
194 |
||
195 |
# only read up to 100 files or so |
|
196 |
MAX_FILES = 100 |
|
48 | 197 |
|
198 |
# read the lines into here |
|
199 |
lines = [] |
|
41 | 200 |
|
201 |
# loop until done |
|
48 | 202 |
while len(lines) < count : |
41 | 203 |
logfile = None |
204 |
||
205 |
try : |
|
206 |
# get next logfile |
|
207 |
files += 1 |
|
208 |
||
209 |
# open |
|
210 |
logfile = self._get_logfile_date(day_iter.next()) |
|
211 |
||
212 |
except IOError, e : |
|
213 |
# skip nonexistant days if we haven't found any logs yet |
|
214 |
if e.errno != errno.ENOENT : |
|
215 |
raise |
|
216 |
||
217 |
if files > MAX_FILES : |
|
218 |
raise Exception("No recent logfiles found") |
|
219 |
||
220 |
else : |
|
221 |
# skip to next day |
|
222 |
continue |
|
48 | 223 |
|
224 |
# read the lines |
|
225 |
lines = logfile.get_latest(count) + lines |
|
226 |
||
227 |
# return the lines |
|
228 |
return lines |
|
41 | 229 |