1 import datetime, time |
1 import datetime, time |
2 import re |
2 import re |
3 |
3 |
4 import logging; log = logging.getLogger('pvl.syslog.parser') |
4 import logging; log = logging.getLogger('pvl.syslog.parser') |
5 |
5 |
|
6 RFC3339_RE = re.compile(r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(\.\d+)?(Z|[+-]\d{2}:\d{2})?') |
|
7 RFC3339_FMT = '%Y-%m-%dT%H:%M:%S' |
|
8 |
|
9 def rfc3339 (timestamp) : |
|
10 """ |
|
11 RFC3339 timestamps as used in some syslog implementations. |
|
12 |
|
13 Returns a datetime in some random timezone, possibly localtime. |
|
14 """ |
|
15 |
|
16 match = RFC3339_RE.match(timestamp) |
|
17 |
|
18 if not match : |
|
19 return None |
|
20 |
|
21 # parts |
|
22 dt = datetime.datetime.strptime(match.group(1), RFC3339_FMT) |
|
23 tz = match.group(2) |
|
24 |
|
25 # TODO: timezone? |
|
26 return dt |
|
27 |
|
28 if not tz : |
|
29 # XXX: localtime |
|
30 return dt |
|
31 |
|
32 elif tz == 'Z' : |
|
33 # UTC |
|
34 pass |
|
35 |
|
36 elif tz[0] in '+-' : |
|
37 hours, minutes = tz[1:].split(':') |
|
38 td = datetime.timedelta(hours=int(hours), minutes=int(minutes)) |
|
39 |
|
40 if tz[0] == '-' : |
|
41 dt += td |
|
42 if tz[0] == '+' : |
|
43 dt -= td |
|
44 else : |
|
45 raise ValueError("Invalid timezone offset: %s" % timestamp) |
|
46 |
|
47 # XXX: UTC |
|
48 return dt |
|
49 |
|
50 RFC3164_RE = re.compile(r'\w{3} [0-9 ][0-9] \d{2}:\d{2}:\d{2}') |
|
51 RFC3164_FMT = '%b %d %H:%M:%S' |
|
52 RFC3164_PRE = '%Y ' # add missing year, assuming current |
|
53 |
|
54 def rfc3164 (timestamp) : |
|
55 """ |
|
56 Traditional BSD Syslog timestamps. |
|
57 |
|
58 Returns a datetime assumed to be in localtime. |
|
59 """ |
|
60 |
|
61 if not RFC3164_RE.match(timestamp) : |
|
62 return |
|
63 |
|
64 return datetime.datetime.strptime(time.strftime(RFC3164_PRE) + timestamp, RFC3164_PRE + RFC3164_FMT) |
|
65 |
6 class SyslogParser (object) : |
66 class SyslogParser (object) : |
7 """ |
67 """ |
8 Parse syslog lines in text format, as used in logfiles/fifos. |
68 Parse syslog lines in text format, as used in logfiles/fifos. |
9 """ |
69 """ |
10 |
70 |
43 'local4', # 20 |
103 'local4', # 20 |
44 'local5', # 21 |
104 'local5', # 21 |
45 'local6', # 22 |
105 'local6', # 22 |
46 'local7', # 23 |
106 'local7', # 23 |
47 ))) |
107 ))) |
48 |
108 |
49 # default syslogd format |
109 # default syslogd format |
50 SYSLOG_RE = re.compile( |
110 SYSLOG_RE = re.compile( |
51 # the timestamp+hostname header |
111 # the timestamp+hostname header |
52 # XXX: hostname may be missing |
112 # XXX: hostname may be missing |
53 # at least in Ubuntu 11.10 syslogd 'last message repeated 2 times'... |
113 # at least in Ubuntu 11.10 syslogd 'last message repeated 2 times'... |
54 r'(?:<(?P<pri>\d+|(?P<facility>\w+)\.(?P<severity>\w+))>)?' |
114 r'(?:<(?P<pri>\d+|(?P<facility>\w+)\.(?P<severity>\w+))>)?' |
55 + r'(?P<timestamp>\w{3} [0-9 ][0-9] \d{2}:\d{2}:\d{2}) (?P<hostname>\S+)? ' |
115 + r'(?P<timestamp>\w{3} [0-9 ][0-9] \d{2}:\d{2}:\d{2}|.+?) ' |
|
116 + r'(?P<hostname>\S+)? ' |
56 |
117 |
57 # the message, including possible tag/pid |
118 # the message, including possible tag/pid |
58 + r"(?P<message>(?P<tag>(?P<program>[^:\]]+)(?:\[(?P<pid>\d+)\])?: )?(?P<text>.*))\n?" |
119 + r"(?P<message>(?P<tag>(?P<program>[^:\]]+)(?:\[(?P<pid>\d+)\])?: )?(?P<text>.*))\n?" |
59 ) |
120 ) |
60 |
|
61 TIMESTAMP_FMT = '%b %d %H:%M:%S' |
|
62 |
121 |
63 def __init__ (self, raw=False) : |
122 def __init__ (self, raw=False) : |
64 """ |
123 """ |
65 Using given underlying line source. |
124 Using given underlying line source. |
66 """ |
125 """ |
90 """ |
149 """ |
91 Parse timstamp from line into datetime. |
150 Parse timstamp from line into datetime. |
92 """ |
151 """ |
93 |
152 |
94 timestamp = match.group('timestamp') |
153 timestamp = match.group('timestamp') |
95 |
154 |
96 # add missing year; assume current |
155 # timestamp, in various formats |
97 timestamp = time.strftime('%Y') + ' ' + timestamp |
156 try : |
98 |
157 return rfc3164(timestamp) or rfc3339(timestamp) |
99 # k |
158 |
100 timestamp = datetime.datetime.strptime(timestamp, '%Y ' + self.TIMESTAMP_FMT) |
159 except ValueError as ex: |
101 |
160 # skip it |
102 return timestamp |
161 log.warning("timestamp: %s:", timestamp, exc_info=ex) |
|
162 return None |
103 |
163 |
104 def parse_prog (self, match) : |
164 def parse_prog (self, match) : |
105 """ |
165 """ |
106 Parse prog from line. |
166 Parse prog from line. |
107 """ |
167 """ |
150 host = match.group('hostname'), |
210 host = match.group('hostname'), |
151 prog = self.parse_prog(match), |
211 prog = self.parse_prog(match), |
152 pid = match.group('pid'), |
212 pid = match.group('pid'), |
153 msg = match.group('text'), |
213 msg = match.group('text'), |
154 ) |
214 ) |
155 |
215 |
156 # facility/severity prefix? |
216 # facility/severity prefix? |
157 if match.group('pri') : |
217 if match.group('pri') : |
158 item.update(self.parse_pri(match)) |
218 item.update(self.parse_pri(match)) |
159 |
219 |
160 return item |
220 return item |