18 """ |
26 """ |
19 |
27 |
20 self.tz = tz |
28 self.tz = tz |
21 self.timestamp_fmt = timestamp_fmt |
29 self.timestamp_fmt = timestamp_fmt |
22 |
30 |
23 def parse_lines (self, lines, date=None, starting_offset=None) : |
31 def parse_lines (self, channel, lines, date=None, starting_offset=None) : |
24 """ |
32 """ |
25 Parse the given (iterable) lines of unicode text into a LogEvent, no trailing newline. |
33 Parse the given (iterable) lines of unicode text into a LogEvent, no trailing newline. |
|
34 |
|
35 Channel is the LogChannel that these lines belong to. |
26 |
36 |
27 Offset is the starting offset, and may be None to not use it. |
37 Offset is the starting offset, and may be None to not use it. |
28 |
38 |
29 Giving date lets the parser build full timestamps, otherwise, unless line timestamps have full date |
39 Giving date lets the parser build full timestamps, otherwise, unless line timestamps have full date |
30 information, event timestamps will have a date component of 1900/1/1. |
40 information, event timestamps will have a date component of 1900/1/1. |
31 """ |
41 """ |
32 |
42 |
33 abstract |
43 abstract |
34 |
44 |
35 |
|
36 class IrssiParser (LogParser) : |
45 class IrssiParser (LogParser) : |
37 """ |
46 """ |
38 A parser for irssi logfiles |
47 A parser for irssi logfiles |
39 """ |
48 """ |
40 |
49 |
41 def parse_lines (self, lines, date=None, starting_offset=None) : |
50 # subexpression parts |
|
51 _TS = r'(?P<timestamp>\S+)' |
|
52 _NICK = r'(?P<nickname>.+?)' |
|
53 _NICK2 = r'(?P<nickname2>.+?)' |
|
54 _CHAN = r'(?P<channel>.+?)' |
|
55 _USERHOST = r'(?P<username>.*?)@(?P<hostname>.*?)' |
|
56 _MSG = r'(?P<message>.*)' |
|
57 |
|
58 # regular expressions for matching lines, by type |
|
59 TYPE_EXPRS = ( |
|
60 ( LogTypes.LOG_OPEN, r'--- Log opened (?P<datetime>.+)' ), |
|
61 ( LogTypes.LOG_CLOSE, r'--- Log closed (?P<datetime>.+)' ), |
|
62 ( LogTypes.MSG, _TS + r' <(?P<flags>.)' + _NICK + '> ' + _MSG ), |
|
63 ( LogTypes.NOTICE, _TS + r' -' + _NICK + ':' + _CHAN + '- ' + _MSG ), |
|
64 ( LogTypes.ACTION, _TS + r' \* ' + _NICK + ' ' + _MSG ), |
|
65 ( LogTypes.JOIN, _TS + r' -!- ' + _NICK + ' \[' + _USERHOST + '\] has joined ' + _CHAN ), |
|
66 ( LogTypes.PART, _TS + r' -!- ' + _NICK + ' \[' + _USERHOST + '\] has left ' + _CHAN + ' \[(?P<message>.*?)\]' ), |
|
67 ( LogTypes.KICK, _TS + r' -!- ' + _NICK2 + ' was kicked from ' + _CHAN + ' by ' + _NICK + ' \[(?P<message>.*?)\]' ), |
|
68 ( LogTypes.MODE, _TS + r' -!- mode/' + _CHAN + ' \[(?P<mode>.+?)\] by (?P<nickname>\S+)' ), |
|
69 ( LogTypes.NICK, _TS + r' -!- ' + _NICK + ' is now known as (?P<nickname2>\S+)' ), |
|
70 ( LogTypes.QUIT, _TS + r' -!- ' + _NICK + ' \[' + _USERHOST + '\] has quit \[(?P<message>.*?)\]' ), |
|
71 ( LogTypes.TOPIC, _TS + r' -!- ' + _NICK + ' changed the topic of ' + _CHAN + ' to: (?P<topic>.*)' ), |
|
72 |
|
73 ( LogTypes.SELF_NOTICE, _TS + r' \[notice\(' + _CHAN + '\)\] ' + _MSG ), |
|
74 ( LogTypes.SELF_NICK, _TS + r' -!- You\'re now known as (?P<nickname2>\S+)' ), |
|
75 ) |
|
76 |
|
77 # precompile |
|
78 TYPE_REGEXES = [(type, re.compile(expr)) for type, expr in TYPE_EXPRS] |
|
79 |
|
80 def parse_line (self, channel, line, date, offset=None) : |
|
81 """ |
|
82 Parse a single line, and return the resulting LogLine, or None, to ignore the line. |
|
83 |
|
84 Uses self.TYPE_REGEXES to do the matching |
|
85 """ |
|
86 |
|
87 # empty line |
|
88 if not line : |
|
89 return |
|
90 |
|
91 # look for match |
|
92 match = type = None |
|
93 |
|
94 # test each type |
|
95 for type, regex in self.TYPE_REGEXES : |
|
96 # attempt to match |
|
97 match = regex.match(line) |
|
98 |
|
99 # found, break |
|
100 if match : |
|
101 break |
|
102 |
|
103 # no match found? |
|
104 if not match : |
|
105 raise LogParseError(offset, line, "Line did not match any type") |
|
106 |
|
107 # match groups |
|
108 groups = match.groupdict(None) |
|
109 |
|
110 # parse timestamp |
|
111 if 'datetime' in groups : |
|
112 # parse datetime using default asctime() format |
|
113 dt = datetime.datetime.strptime(groups['datetime'], '%a %b %d %H:%M:%S %Y') |
|
114 |
|
115 elif 'timestamp' in groups : |
|
116 # parse timestamp into naive datetime |
|
117 dt = datetime.datetime.strptime(groups['timestamp'], self.timestamp_fmt) |
|
118 |
|
119 # override date? |
|
120 if date : |
|
121 dt = dt.replace(year=date.year, month=date.month, day=date.day) |
|
122 |
|
123 else : |
|
124 # no timestamp !? |
|
125 raise LogParseError(offset, line, "No timestamp") |
|
126 |
|
127 # now localize with timezone |
|
128 dtz = self.tz.localize(dt) |
|
129 |
|
130 # source |
|
131 source = (groups.get('nickname'), groups.get('username'), groups.get('hostname'), groups.get('flags')) |
|
132 |
|
133 # target |
|
134 target = groups.get('nickname2') |
|
135 |
|
136 # data |
|
137 if 'message' in groups : |
|
138 data = groups['message'] |
|
139 |
|
140 elif 'mode' in groups : |
|
141 data = groups['mode'] |
|
142 |
|
143 elif 'topic' in groups : |
|
144 data = groups['topic'] |
|
145 |
|
146 else : |
|
147 data = None |
|
148 |
|
149 # build+return LogLine |
|
150 return LogLine(channel, offset, type, dtz, source, target, data) |
|
151 |
|
152 def parse_lines (self, channel, lines, date=None, starting_offset=None) : |
42 """ |
153 """ |
43 Parse the given lines, yielding LogEvents. |
154 Parse the given lines, yielding LogEvents. |
44 """ |
155 """ |
45 |
156 |
46 for offset, line in enumerate(lines) : |
157 for offset, line in enumerate(lines) : |
51 else : |
162 else : |
52 offset = None |
163 offset = None |
53 |
164 |
54 # try and parse |
165 # try and parse |
55 try : |
166 try : |
56 line = self.parse_line(line, date, offset) |
167 line = self.parse_line(channel, line, date, offset) |
57 |
168 |
|
169 # passthrough LogParseError's |
|
170 except LogParseError : |
|
171 raise |
|
172 |
|
173 # wrap other errors as LogParseError |
58 except Exception, e : |
174 except Exception, e : |
59 raise Exception("Parsing line failed: %r@%d: %s" % (line, offset, e)) |
175 raise LogParseError(line, offset, "Parsing line failed: %s" % e) |
60 |
176 |
61 else : |
177 else : |
62 # yield unless None |
178 # yield unless None |
63 if line : |
179 if line : |
64 yield line |
180 yield line |
65 |
181 |
66 def parse_line (self, line, date, offset=None) : |
|
67 """ |
|
68 Parse a single line, and return the resulting LogLine, or None, to ignore the line |
|
69 """ |
|
70 |
|
71 # empty line |
|
72 if not line : |
|
73 return |
|
74 |
182 |
75 # status lines |
|
76 elif line.startswith('---') : |
|
77 # XXX: handle these |
|
78 return |
|
79 |
|
80 # normal lines |
|
81 else : |
|
82 # XXX: only parse timestamps for now |
|
83 timestamp, data = line.split(' ', 1) |
|
84 |
|
85 # parse timestamp into naive datetime |
|
86 dt = datetime.datetime.strptime(timestamp, self.timestamp_fmt) |
|
87 |
|
88 # override date? |
|
89 if date : |
|
90 dt = dt.replace(year=date.year, month=date.month, day=date.day) |
|
91 |
|
92 # now localize with timezone |
|
93 dtz = self.tz.localize(dt) |
|
94 |
|
95 # build raw event |
|
96 return log_line.LogLine(offset, LogTypes.RAW, dtz, None, data) |
|
97 |
|