18 """ |
15 """ |
19 Yield the latest events, up to `count` of them. |
16 Yield the latest events, up to `count` of them. |
20 """ |
17 """ |
21 |
18 |
22 abstract |
19 abstract |
|
20 |
|
21 def get_date (self, dt) : |
|
22 """ |
|
23 Get logs for the given date (as a datetime) |
|
24 """ |
|
25 |
|
26 abstract |
23 |
27 |
24 class LogFile (LogSource) : |
28 class LogFile (LogSource) : |
25 """ |
29 """ |
26 A file containing LogEvents |
30 A file containing LogEvents |
27 """ |
31 """ |
28 |
32 |
29 def __init__ (self, path, charset='utf-8', sep='\n') : |
33 def __init__ (self, path, parser, start_date=None, charset='utf-8', sep='\n') : |
30 """ |
34 """ |
31 Open the file at the given path, which contains data of the given codec, as lines separated by the given separator |
35 Open the file at the given path, which contains data with the given charset, as lines separated by the |
|
36 given separator. Lines are parsed using the given parser, using the given date as an initial date, see |
|
37 LogParser for more info. XXX: currently we assume start_date also for the end of the file |
32 """ |
38 """ |
33 |
39 |
34 # store |
40 # store |
35 self.path = path |
41 self.path = path |
|
42 self.parser = parser |
|
43 self.start_date = start_date |
36 self.charset = charset |
44 self.charset = charset |
37 self.sep = sep |
45 self.sep = sep |
38 |
46 |
39 # open |
47 # open |
40 self.file = open(path, 'rb') |
48 self.file = open(path, 'rb') |
41 |
49 |
42 def __iter__ (self) : |
50 def __iter__ (self) : |
43 """ |
51 """ |
44 Yields a series of lines, as read from the top of the file |
52 Yields a series of unicode lines, as read from the top of the file |
45 """ |
53 """ |
46 |
54 |
47 # seek to beginning |
55 # seek to beginning |
48 self.file.seek(0) |
56 self.file.seek(0) |
49 |
57 |
50 # iterate over lines |
58 # iterate over lines, decoding them as well |
51 return iter(self.file) |
59 return (line.decode(self.charset) for line in self.file) |
52 |
60 |
53 def get_latest (self, count) : |
61 def read_full (self) : |
54 """ |
62 """ |
55 Returns up to <count> lines from the end of the file, or less, if the file doesn't contain that many lines |
63 Reads all LogLines |
56 """ |
64 """ |
57 |
65 |
58 # the list of lines |
66 # just use our __iter__ |
59 lines = [] |
67 return self.parser.parse_lines(self, self.start_date) |
|
68 |
|
69 def read_from (self, dt) : |
|
70 """ |
|
71 Reads all LogLines from the given naive timestamp onwards |
|
72 """ |
|
73 |
|
74 # start reading at beginning |
|
75 events = self.read_full() |
|
76 |
|
77 # skip unwanted events |
|
78 for event in events : |
|
79 if event.timestamp < dt : |
|
80 continue |
|
81 |
|
82 else : |
|
83 # include this line as well |
|
84 yield event |
|
85 break |
|
86 |
|
87 # yield the rest as-is |
|
88 for event in events : |
|
89 yield event |
|
90 |
|
91 def read_until (self, dt) : |
|
92 """ |
|
93 Reads all LogLines up until the given naive timestamp |
|
94 """ |
|
95 |
|
96 # start reading events at the beginning |
|
97 events = self.read_full() |
|
98 |
|
99 # yield events until we hit the given timestamp |
|
100 for event in events : |
|
101 if event.timestamp <= dt : |
|
102 yield event |
|
103 |
|
104 else : |
|
105 break |
|
106 |
|
107 # ignore the rest |
|
108 return |
|
109 |
|
110 def _read_blocks_reverse (self, blocksize=1024) : |
|
111 """ |
|
112 Yields blocks of file data in reverse order, starting at the end of the file |
|
113 """ |
60 |
114 |
61 # seek to end of file |
115 # seek to end of file |
62 self.file.seek(0, os.SEEK_END) |
116 self.file.seek(0, os.SEEK_END) |
63 |
117 |
64 # read offset |
118 # read offset |
65 # XXX: hack -1 to get rid of trailing newline |
119 # XXX: hack -1 to get rid of trailing newline |
66 size = offset = self.file.tell() - 1 |
120 size = offset = self.file.tell() - 1 |
67 |
121 |
68 # use this blocksize |
122 # do not try to read past the beginning of the file |
69 BLOCKSIZE = 1024 |
123 while offset > 0: |
70 |
|
71 # trailing data |
|
72 buf = '' |
|
73 |
|
74 # read a block at a time, backwards |
|
75 while len(lines) < count and offset > 0: |
|
76 # calc new offset + size |
124 # calc new offset + size |
77 if offset > BLOCKSIZE : |
125 if offset > blocksize : |
78 # full block |
126 # full block |
79 offset -= BLOCKSIZE |
127 offset -= blocksize |
80 read_size = BLOCKSIZE |
128 read_size = blocksize |
81 |
129 |
82 else : |
130 else : |
83 # partial block |
131 # partial block |
84 read_size = offset |
132 read_size = offset |
85 offset = 0 |
133 offset = 0 |
86 |
134 |
87 # seek to offset |
135 # seek to offset |
88 self.file.seek(offset) |
136 self.file.seek(offset) |
89 |
137 |
90 # read the data we want |
138 # read the data we want |
91 read_buf = self.file.read(read_size) |
139 block = self.file.read(read_size) |
92 read_len = len(read_buf) |
|
93 |
140 |
94 # sanity check |
141 # sanity check |
95 assert read_len == read_size |
142 assert len(block) == read_size |
96 |
143 |
|
144 # yield |
|
145 yield block |
|
146 |
|
147 def _read_lines_reverse (self) : |
|
148 """ |
|
149 Yields decoded lines from the end of the file, in reverse order. |
|
150 """ |
|
151 |
|
152 # partial lines |
|
153 buf = '' |
|
154 |
|
155 # read from end of file, a block at a time |
|
156 for block in self._read_blocks_reverse() : |
97 # add in our previous buf |
157 # add in our previous buf |
98 buf = read_buf + buf |
158 buf = block + buf |
99 |
159 |
100 # split out lines |
160 # split up lines |
101 buf_lines = buf.split(self.sep) |
161 lines = buf.split(self.sep) |
102 |
162 |
103 # keep the first one as our buffer, as it's incomplete |
163 # keep the first one as our buffer, as it's incomplete |
104 buf = buf_lines[0] |
164 buf = lines[0] |
105 |
165 |
106 # prepend up to count lines from the end to our lines buffer |
166 # yield the rest a line at a time in reverse order... this looks weird, but that's how slicing works :) |
107 lines = buf_lines[-min(count, len(buf_lines) - 1):] + lines |
167 # XXX: use something like islice, this has to build a slice object |
108 |
168 for line in lines[:0:-1] : |
109 # decode |
169 yield line.decode(self.charset) |
110 # XXX: better queue implementation, plz |
170 |
111 lines = [line.decode(self.charset) for line in lines] |
171 def get_latest (self, count) : |
112 |
172 """ |
113 # return the line list |
173 Returns up to count events, from the end of the file, or less, if the file doesn't contain that many lines. |
114 return lines |
174 """ |
|
175 |
|
176 # the list of lines |
|
177 lines = [] |
|
178 |
|
179 # start reading lines into lines |
|
180 for line in self._read_lines_reverse() : |
|
181 # append |
|
182 lines.append(line) |
|
183 |
|
184 # done? |
|
185 if len(lines) >= count : |
|
186 break |
|
187 |
|
188 # decode in reverse order, using our starting date.... |
|
189 # XXX: use lines[::-1] or reversed? |
|
190 # XXX: it may make more sense to parse in reverse order, using 'self.end_date' or something like that |
|
191 return self.parser.parse_lines(reversed(lines), self.start_date) |
115 |
192 |
116 class LogDirectory (LogSource) : |
193 class LogDirectory (LogSource) : |
117 """ |
194 """ |
118 A directory containing a series of timestamped LogFiles |
195 A directory containing a series of timestamped LogFiles |
119 """ |
196 """ |
120 |
197 |
121 def __init__ (self, path, tz, charset='utf-8', filename_fmt='%Y-%m-%d') : |
198 def __init__ (self, path, tz, parser, charset='utf-8', filename_fmt='%Y-%m-%d') : |
122 """ |
199 """ |
123 Load the logfiles at the given path. |
200 Load the logfiles at the given path. |
124 |
201 |
125 The files contain data in the given charset, and are named according the the date in the given timezone and |
202 The files contain data in the given charset, and are named according the the date in the given timezone and |
126 date format. |
203 date format, and will be parsed using the given parser. |
127 """ |
204 """ |
128 |
205 |
129 # store |
206 # store |
130 self.path = path |
207 self.path = path |
131 self.tz = tz |
208 self.tz = tz |
|
209 self.parser = parser |
132 self.charset = charset |
210 self.charset = charset |
133 self.filename_fmt = filename_fmt |
211 self.filename_fmt = filename_fmt |
134 |
212 |
135 def _get_logfile_datetime (self, dt) : |
213 def _get_logfile_datetime (self, dt) : |
136 """ |
214 """ |